library(readxl)
PWH_data <- read_excel("C:/Users/zhaoz03/OneDrive - The Mount Sinai Hospital/MY PROJECTS/Project 11. GI Pressue EVA Study/Dataset/Final spreadsheet_controls and PWH.xlsx", sheet = 2, skip = 1) %>%
  mutate(
    MCASS_total = 
      case_when(
        mcass_total <= 10 & mcass_total >= 0 ~ "Normal/Equivocal",
        mcass_total <= 20 & mcass_total >= 11 ~ "Mild AN",
        mcass_total <= 30 & mcass_total >= 21 ~ "Moderate AN",
        mcass_total > 30 ~ "Severe AN",
        TRUE ~ NA),
    Autonomic_Neuropathy = 
      case_when(
        cass_total >= 3 ~ "Yes",
        cass_total < 3 ~ "No",
        TRUE ~ NA),      
    Prolonged_SBTT = case_when(
        as.numeric(SBTT...34) > 360 ~ "Yes",
        as.numeric(SBTT...34) <= 360 ~ "No",
        TRUE ~ NA)
    ) %>%
  select(ID, Age, Sex, MCASS_total, Autonomic_Neuropathy, SBTT...34, Prolonged_SBTT) 

Control_data_eva <- read.csv("C:/Users/zhaoz03/OneDrive - The Mount Sinai Hospital/MY PROJECTS/Project 9. EVA new study - GI Motility in HIV/Final spreadsheet_controls_Updated_9.3.24.csv", skip = 1, header = TRUE, na.strings = c("", "NA")) %>% 
  filter(!(SITE %in% c("EVA control group", "Male participants from first three batches (historical control)", "Over 65 years of age (historical controls)"))) %>%
  mutate(AGE = as.numeric(AGE)) %>%
  mutate(
    MCASS_total = 
      case_when(
        mcass_total <= 10 & mcass_total >= 0 ~ "Normal/Equivocal",
        mcass_total <= 20 & mcass_total >= 11 ~ "Mild AN",
        mcass_total <= 30 & mcass_total >= 21 ~ "Moderate AN",
        mcass_total > 30 ~ "Severe AN",
        TRUE ~ NA),
    Autonomic_Neuropathy = 
      case_when(
        cass_total >= 3 ~ "Yes",
        cass_total < 3 ~ "No",
        TRUE ~ NA),    
    Prolonged_SBTT = case_when(
        SBTT > 360 ~ "Yes",
        SBTT <= 360 ~ "No",
        TRUE ~ NA)
    ) %>%
  select(SUBJID, AGE, SEX, MCASS_total, Autonomic_Neuropathy, Prolonged_SBTT)  %>%
  rename(Age = AGE, Sex = SEX)

Control_data_jack <- read_excel("C:/Users/zhaoz03/OneDrive - The Mount Sinai Hospital/MY PROJECTS/Project 11. GI Pressue EVA Study/Dataset/Copy of Demographic info_historical controls.xlsx") 
Control_data_jack <- Control_data_jack %>% 
  mutate(
    AGE = as.numeric(AGE),
    combined_ID = paste0(SITEN, SUBJID),
    SEXO = case_when(
      SEXO == 1 ~ "Male",
      SEXO == 2 ~ "Female",
      TRUE ~ NA
    ))%>%
  mutate(
    Prolonged_SBTT = case_when(
        SBTT > 360 ~ "Yes",
        SBTT <= 360 ~ "No",
        TRUE ~ NA)
    ) %>%
  select(combined_ID, AGE, SEXO, Prolonged_SBTT) %>%
  mutate(combined_ID = as.numeric(combined_ID)) %>%
  rename(Age = AGE, Sex = SEXO) %>%
  filter(!is.na(combined_ID))

1 Jack’s Data Analyses

Control_Patients_5min <- read.csv("C:/Users/zhaoz03/OneDrive - The Mount Sinai Hospital/MY PROJECTS/Project 11. GI Pressue EVA Study/Verified Dataset/Control_Patients_5min.csv")


Control_Patients_5min <- Control_Patients_5min %>%
  filter(!is.na(epoch_type)) %>% 
  group_by(combined_ID) %>% 
  mutate(number_of_fast_epochs = sum(epoch_type == "fast epoch"), number_of_medium_epochs = sum(epoch_type == "medium epoch"), number_of_slow_epochs = sum(epoch_type == "slow epoch")) %>%
  ungroup() %>% 
  mutate(
    number_of_total_epochs = rowSums(across(c(number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs))),
    number_of_fast_epochs_frequency = number_of_fast_epochs/total_no_missing_time,
    number_of_medium_epochs_frequency = number_of_medium_epochs/total_no_missing_time,
    number_of_slow_epochs_frequency = number_of_slow_epochs/total_no_missing_time,
    number_of_total_epochs_frequency = number_of_total_epochs/total_no_missing_time)


Control_Patients_5min <- Control_Patients_5min %>% filter(full_window_size == "Yes", !is.na(epoch_type))
Control_Patients_5min <- Control_Patients_5min %>% filter(prefiltration == FALSE)

Control_Patients_5min <- Control_Patients_5min %>% 
  mutate(time_from_swallow_hour = time_from_swallow_minute/60,
         time_from_gastric_emptying_hour = time_from_gastric_emptying_minute/60)

Control_Patients_5min <- Control_Patients_5min %>%
  mutate(time_fraction = time_from_gastric_emptying_hour/total_time)

Control_Patients_5min <- Control_Patients_5min %>%
  rename(sum_of_amplitude = total_pressure_exposure,
        imputed_sum_of_amplitude = imputed_total_pressure_exposure)

Control_Patients_5min <- Control_Patients_5min %>%
  mutate(motility_index = log(sum_of_amplitude * total_number_of_contractions + 1))

1.1 Describe within-subject RMSSD variability for Control Data

Question: Does it seem true that for fast epochs RMSSD is more consistent?

Answer: It seems true that faster epochs RMSSD is more consistent, especially for RMSSD Time.

rmssd_variability_Control_Patients_5min <- Control_Patients_5min %>%
  group_by(combined_ID, epoch_type) %>%
  summarize(
    mean_RMSSD_Time = mean(epoch_RMSSD_Time),
    median_RMSSD_Time = median(epoch_RMSSD_Time),
    sd_RMSSD_Time = sd(epoch_RMSSD_Time),
    mean_RMSSD_Magnitude = mean(epoch_RMSSD_Magnitude),
    median_RMSSD_Magnitude = median(epoch_RMSSD_Magnitude),
    sd_RMSSD_Magnitude = sd(epoch_RMSSD_Magnitude),
    n_epochs = n(),
    .groups = "drop"
  )
library(ggplot2)
library(gridExtra)

p1 <- ggplot(rmssd_variability_Control_Patients_5min, aes(x = epoch_type, y = sd_RMSSD_Time)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Standard Deviation of RMSSD Time for each ID by Epoch Type",
    x = "Epoch Type",
    y = "SD of RMSSD Time"
  ) +
  ylim(0, 100) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )

p2 <- ggplot(rmssd_variability_Control_Patients_5min, aes(x = epoch_type, y = sd_RMSSD_Magnitude)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Standard Deviation of RMSSD Amplitude for each ID by Epoch Type",
    x = "Epoch Type",
    y = "SD of RMSSD Magnitude"
  ) +
  ylim(0, 250) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )

p3 <- ggplot(rmssd_variability_Control_Patients_5min, aes(x = epoch_type, y = median_RMSSD_Time)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Median of RMSSD Time for each ID by Epoch Type",
    x = "Epoch Type",
    y = "Median of RMSSD Time"
  ) +
  ylim(0, 100) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )
p4 <- ggplot(rmssd_variability_Control_Patients_5min, aes(x = epoch_type, y = median_RMSSD_Magnitude)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Median of RMSSD Amplitude for each ID by Epoch Type",
    x = "Epoch Type",
    y = "Median of RMSSD Magnitude"
  ) +
  ylim(0, 250) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )
grid.arrange(p1, p2, p3, p4, ncol = 2)

mycontrols <- tableby.control(test = TRUE, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "meansd", "medianq1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels = list(
    meansd = "Mean (SD)",
    # medianq1q3 = "Median (Q1, Q3)",
    # range = "Min - Max",
    quantile5 = "5th Percentile",
    quantile95 = "95th Percentile",
    Nmiss2 = "Missing"
  ))

tab.test <- tableby(epoch_type ~ mean_RMSSD_Time + median_RMSSD_Time + sd_RMSSD_Time + mean_RMSSD_Magnitude + median_RMSSD_Magnitude + sd_RMSSD_Magnitude, data = rmssd_variability_Control_Patients_5min, control=mycontrols) 

summary(tab.test)
fast epoch (N=83) medium epoch (N=87) slow epoch (N=90) Total (N=260) p value
mean_RMSSD_Time < 0.001
   N 83 87 90 260
   Mean (SD) 51.261 (4.429) 64.353 (7.868) 79.844 (9.061) 65.536 (13.852)
   Median (Q1, Q3) 50.526 (47.764, 53.527) 62.390 (59.435, 67.901) 80.136 (73.471, 84.824) 63.500 (53.541, 76.360)
   Min 44.497 52.110 52.200 44.497
   Max 63.850 96.330 107.933 107.933
median_RMSSD_Time < 0.001
   N 83 87 90 260
   Mean (SD) 50.465 (4.285) 63.418 (7.848) 78.458 (9.098) 64.489 (13.636)
   Median (Q1, Q3) 49.280 (47.425, 53.165) 61.950 (58.100, 65.892) 77.975 (71.345, 84.540) 62.400 (53.095, 73.833)
   Min 43.860 52.110 52.200 43.860
   Max 63.850 96.330 101.360 101.360
sd_RMSSD_Time < 0.001
   N 72 69 81 222
   N-Miss 11 18 9 38
   Mean (SD) 7.024 (4.858) 9.032 (5.837) 14.478 (8.132) 10.368 (7.238)
   Median (Q1, Q3) 6.227 (4.241, 7.857) 7.976 (4.647, 12.864) 12.595 (8.243, 19.333) 8.303 (5.112, 13.737)
   Min 0.134 0.000 2.539 0.000
   Max 26.168 27.165 40.985 40.985
mean_RMSSD_Magnitude < 0.001
   N 83 87 90 260
   Mean (SD) 70.625 (28.163) 59.799 (24.943) 46.434 (26.284) 58.628 (28.166)
   Median (Q1, Q3) 67.335 (50.524, 83.942) 53.579 (41.868, 68.798) 37.122 (29.681, 56.516) 52.975 (38.541, 72.248)
   Min 20.160 28.345 19.610 19.610
   Max 183.573 149.150 180.510 183.573
median_RMSSD_Magnitude < 0.001
   N 83 87 90 260
   Mean (SD) 65.556 (24.956) 56.492 (23.552) 43.154 (26.142) 54.769 (26.484)
   Median (Q1, Q3) 61.095 (47.792, 75.675) 49.800 (40.300, 63.960) 35.885 (26.372, 49.825) 48.633 (35.900, 65.963)
   Min 20.160 28.345 20.100 20.100
   Max 157.550 149.150 180.510 180.510
sd_RMSSD_Magnitude 0.004
   N 72 69 81 222
   N-Miss 11 18 9 38
   Mean (SD) 30.410 (30.947) 24.217 (18.916) 22.499 (22.217) 25.599 (24.660)
   Median (Q1, Q3) 24.288 (15.183, 34.176) 18.350 (12.501, 28.706) 13.988 (9.279, 25.430) 18.105 (11.634, 30.590)
   Min 0.021 0.225 1.344 0.021
   Max 230.546 94.753 107.742 230.546
knitr::kable(rmssd_variability_Control_Patients_5min %>%
               group_by(epoch_type) %>%
                summarize(
                  p5_sd_RMSSD_Time = quantile(sd_RMSSD_Time, 0.05, na.rm = TRUE),
                  p5_median_RMSSD_Time = quantile(median_RMSSD_Time, 0.05, na.rm = TRUE),
                  p5_mean_RMSSD_Time = quantile(mean_RMSSD_Time, 0.05, na.rm = TRUE),
                  p95_sd_RMSSD_Time = quantile(sd_RMSSD_Time, 0.95, na.rm = TRUE),
                  p95_median_RMSSD_Time = quantile(median_RMSSD_Time, 0.95, na.rm = TRUE),
                  p95_mean_RMSSD_Time = quantile(mean_RMSSD_Time, 0.95, na.rm = TRUE),
                  p5_sd_RMSSD_Magnitude = quantile(sd_RMSSD_Magnitude, 0.05, na.rm = TRUE),
                  p5_median_RMSSD_Magnitude = quantile(median_RMSSD_Magnitude, 0.05, na.rm = TRUE),
                  p5_mean_RMSSD_Magnitude = quantile(mean_RMSSD_Magnitude, 0.05, na.rm = TRUE),
                  p95_sd_RMSSD_Magnitude = quantile(sd_RMSSD_Magnitude, 0.95, na.rm = TRUE),
                  p95_median_RMSSD_Magnitude = quantile(median_RMSSD_Magnitude, 0.95, na.rm = TRUE),
                  p95_mean_RMSSD_Magnitude = quantile(mean_RMSSD_Magnitude, 0.95, na.rm = TRUE),
                ), 
  format = "html")
epoch_type p5_sd_RMSSD_Time p5_median_RMSSD_Time p5_mean_RMSSD_Time p95_sd_RMSSD_Time p95_median_RMSSD_Time p95_mean_RMSSD_Time p5_sd_RMSSD_Magnitude p5_median_RMSSD_Magnitude p5_mean_RMSSD_Magnitude p95_sd_RMSSD_Magnitude p95_median_RMSSD_Magnitude p95_mean_RMSSD_Magnitude
fast epoch 1.914226 45.3435 45.95040 15.41039 58.4450 59.81925 8.438141 33.011 33.89300 73.84025 112.14500 116.18890
medium epoch 1.370373 55.7390 56.27400 18.83540 78.8380 80.97400 4.124470 33.923 34.58900 61.87619 97.04500 106.19450
slow epoch 4.582274 66.0525 67.42725 29.73492 93.4595 92.70525 3.681226 21.536 23.35412 65.67032 92.99625 93.11925
p1 <- ggplot(Control_Patients_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "fast epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Time)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Time Distribution by Fast Epoch",
    x = "ID",
    y = "RMSSD Time"
  ) +
  ylim(0, 200) +
  theme_minimal()

p2 <- ggplot(Control_Patients_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "medium epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Time)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Time Distribution by Medium Epoch",
    x = "ID",
    y = "RMSSD Time"
  ) +
  ylim(0, 200) +
  theme_minimal()

p3 <- ggplot(Control_Patients_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "slow epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Time)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Time Distribution by Slow Epoch",
    x = "ID",
    y = "RMSSD Time"
  ) +
  ylim(0, 200) +
  theme_minimal()

grid.arrange(p1, p2, p3, ncol = 3)

p1 <- ggplot(Control_Patients_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "fast epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Magnitude)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Magnitude Distribution by Fast Epoch",
    x = "ID",
    y = "RMSSD Magnitude"
  ) +
  ylim(0, 400) +
  theme_minimal()

p2 <- ggplot(Control_Patients_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "medium epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Magnitude)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Magnitude Distribution by Medium Epoch",
    x = "ID",
    y = "RMSSD Magnitude"
  ) +
  ylim(0, 400) +
  theme_minimal()

p3 <- ggplot(Control_Patients_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "slow epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Magnitude)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Magnitude Distribution by Slow Epoch",
    x = "ID",
    y = "RMSSD Magnitude"
  ) +
  ylim(0, 400) +
  theme_minimal()

grid.arrange(p1, p2, p3, ncol = 3)

1.2 Describe how the epochs are distributed over time

Question: Are they more in the proximal or distal small bowel?

p1 <- ggplot(Control_Patients_5min %>% filter(epoch_type == "fast epoch"), aes(x = time_fraction)) +
  geom_histogram(fill = "steelblue", color = "white") +
  labs(
    x = "        ",
    y = "Fast Rhythmic Intervals"
  ) +
  ylim(0, 35) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )

p2 <- ggplot(Control_Patients_5min %>% filter(epoch_type == "medium epoch"), aes(x = time_fraction)) +
  geom_histogram(fill = "steelblue", color = "white") +
  labs(
    x = "Time Fraction since Gastric Emptying Time",
    y = "Medium Rhythmic Intervals"
  ) +
  ylim(0, 35) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )

p3 <- ggplot(Control_Patients_5min %>% filter(epoch_type == "slow epoch"), aes(x = time_fraction)) +
  geom_histogram(fill = "steelblue", color = "white") +
  labs(
    x = "        ",
    y = "Slow  Rhythmic Intervals"
  ) +
  ylim(0, 35) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )

grid.arrange(p1, p2, p3, ncol = 3)

p1

p2

p3

Question: Do they tend to occur together in clumps or separated out?

fast_epochs <- Control_Patients_5min %>% 
  filter(epoch_type == "fast epoch", !is.na(time_from_gastric_emptying_minute)) %>%
  arrange(combined_ID, time_from_gastric_emptying_minute) %>%
  group_by(combined_ID) %>%
  mutate(time_gap = time_from_gastric_emptying_minute - lag(time_from_gastric_emptying_minute) - 5) %>%
  ungroup()

fast_epochs <- fast_epochs %>%
  group_by(combined_ID) %>%
  summarize(
    n_fast_epochs = n(),
    avg_gap = round(mean(time_gap, na.rm = TRUE),2),
    median_gap = round(median(time_gap, na.rm = TRUE),2),
    min_gap = round(min(time_gap, na.rm = TRUE),2),
    max_gap = round(max(time_gap, na.rm = TRUE),2)
  )
medium_epochs <- Control_Patients_5min %>% 
  filter(epoch_type == "medium epoch", !is.na(time_from_gastric_emptying_minute)) %>%
  arrange(combined_ID, time_from_gastric_emptying_minute) %>%
  group_by(combined_ID) %>%
  mutate(time_gap = time_from_gastric_emptying_minute - lag(time_from_gastric_emptying_minute)) %>%
  ungroup()

medium_epochs <- medium_epochs %>%
  group_by(combined_ID) %>%
  summarize(
    n_medium_epochs = n(),
    avg_gap = round(mean(time_gap, na.rm = TRUE),2),
    median_gap = round(median(time_gap, na.rm = TRUE),2),
    min_gap = round(min(time_gap, na.rm = TRUE),2),
    max_gap = round(max(time_gap, na.rm = TRUE),2)
  )
slow_epochs <- Control_Patients_5min %>% 
  filter(epoch_type == "slow epoch", !is.na(time_from_gastric_emptying_minute)) %>%
  arrange(combined_ID, time_from_gastric_emptying_minute) %>%
  group_by(combined_ID) %>%
  mutate(time_gap = time_from_gastric_emptying_minute - lag(time_from_gastric_emptying_minute)) %>%
  ungroup()

slow_epochs <- slow_epochs %>%
  group_by(combined_ID) %>%
  summarize(
    n_slow_epochs = n(),
    avg_gap = round(mean(time_gap, na.rm = TRUE),2),
    median_gap = round(median(time_gap, na.rm = TRUE),2),
    min_gap = round(min(time_gap, na.rm = TRUE),2),
    max_gap = round(max(time_gap, na.rm = TRUE),2)
  )

1.3 Epoch Type - Time Fraction Plot

Control_Patients_5min <- Control_Patients_5min %>% 
  group_by(combined_ID) %>%
  mutate(time_fraction_end = time_fraction + 5/60/total_time)
# ggplot(Control_Patients_5min) +
#   geom_segment(aes(x = time_fraction, xend = time_fraction_end, 
#                    y = as.character(combined_ID), yend = as.character(combined_ID), color = epoch_type)) +
#   labs(x = "Time Fraction", y = "ID", color = "Epoch Type") +
#   theme_minimal() +
#   theme(axis.text.y = element_text(size = 7))
Control_Patients_5min$ID <- as.integer(factor(as.factor(Control_Patients_5min$combined_ID)))
Control_Patients_5min <- Control_Patients_5min %>%
  mutate(ymin = ID - 0.5,
         ymax = ID + 0.5)

ggplot(Control_Patients_5min %>% mutate(
  epoch_type = case_when(
    epoch_type == "fast epoch" ~ "fast",
    epoch_type == "medium epoch" ~ "medium",
    epoch_type == "slow epoch" ~ "slow"
  )
)) +
  geom_rect(aes(xmin = time_fraction, xmax = time_fraction_end,
                ymin = ymin, ymax = ymax, fill = epoch_type)) +
  scale_y_continuous(
    breaks = Control_Patients_5min$ID,
    labels = Control_Patients_5min$combined_ID
  ) +
  labs(x = "Time Fraction since Gastric Emptying Time", y = "Participant ID", fill = "Type of Rhythmic Intervals") +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_blank(),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  ) +   
  geom_segment(
    aes(x = -Inf, xend = Inf, y = ymin, yend = ymin),
    color = "black", linewidth = 0.02
  )

1.4 Is Total Time Correlated to Number of Epochs?

library(corrplot)
res <- cor(data.frame(
  total_time = (Control_Patients_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$total_time,
  total_no_missing_time = (Control_Patients_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$total_no_missing_time,
  number_of_total_epochs = (Control_Patients_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$number_of_total_epochs,
  number_of_fast_epochs = (Control_Patients_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$number_of_fast_epochs,
  number_of_medium_epochs = (Control_Patients_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$number_of_medium_epochs,
  number_of_slow_epochs = (Control_Patients_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$number_of_slow_epochs
  ), 
  use = "complete.obs", method = "spearman")
# res
corrplot(res, type = "upper", 
         tl.col = "black", tl.srt = 45)

2 EVA Data Analyses

EVA_Patients_5min <- read.csv("C:/Users/zhaoz03/OneDrive - The Mount Sinai Hospital/MY PROJECTS/Project 11. GI Pressue EVA Study/Verified Dataset/EVA_Patients_5min.csv")

EVA_Patients_5min <- EVA_Patients_5min %>%
  filter(!is.na(epoch_type)) %>%
  group_by(combined_ID) %>%
  mutate(number_of_fast_epochs = sum(epoch_type == "fast epoch"), number_of_medium_epochs = sum(epoch_type == "medium epoch"), number_of_slow_epochs = sum(epoch_type == "slow epoch")) %>%
  ungroup() %>%
  mutate(
    number_of_total_epochs = rowSums(across(c(number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs))),
    number_of_fast_epochs_frequency = number_of_fast_epochs/total_no_missing_time,
    number_of_medium_epochs_frequency = number_of_medium_epochs/total_no_missing_time,
    number_of_slow_epochs_frequency = number_of_slow_epochs/total_no_missing_time,
    number_of_total_epochs_frequency = number_of_total_epochs/total_no_missing_time)

EVA_Patients_5min <- EVA_Patients_5min %>% filter(full_window_size == "Yes", !is.na(epoch_type))
EVA_Patients_5min <- EVA_Patients_5min %>% filter(prefiltration == FALSE)

EVA_Patients_5min <- EVA_Patients_5min %>% 
  mutate(time_from_swallow_hour = time_from_swallow_minute/60,
         time_from_gastric_emptying_hour = time_from_gastric_emptying_minute/60)

EVA_Patients_5min <- EVA_Patients_5min %>%
  mutate(time_fraction = time_from_gastric_emptying_hour/total_time)

EVA_Patients_5min <- EVA_Patients_5min %>%
  rename(sum_of_amplitude = total_pressure_exposure,
        imputed_sum_of_amplitude = imputed_total_pressure_exposure)

EVA_Patients_5min <- EVA_Patients_5min %>%
  mutate(motility_index = log(sum_of_amplitude * total_number_of_contractions + 1))

EVA_Patients_5min <- EVA_Patients_5min %>% filter(combined_ID != 7180) # Missing Info

2.1 Describe within-subject RMSSD variability for Control Data

Question: Does it seem true that for fast epochs RMSSD is more consistent?

Answer: It seems true that faster epochs RMSSD is more consistent, especially for RMSSD Time.

rmssd_variability_EVA_Patients_5min <- EVA_Patients_5min %>%
  group_by(combined_ID, epoch_type) %>%
  summarize(
    mean_RMSSD_Time = mean(epoch_RMSSD_Time),
    median_RMSSD_Time = median(epoch_RMSSD_Time),
    sd_RMSSD_Time = sd(epoch_RMSSD_Time),
    mean_RMSSD_Magnitude = mean(epoch_RMSSD_Magnitude),
    median_RMSSD_Magnitude = median(epoch_RMSSD_Magnitude),
    sd_RMSSD_Magnitude = sd(epoch_RMSSD_Magnitude),
    n_epochs = n(),
    .groups = "drop"
  )
p1 <- ggplot(rmssd_variability_EVA_Patients_5min, aes(x = epoch_type, y = sd_RMSSD_Time)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Standard Deviation of RMSSD Time for each ID by Epoch Type",
    x = "Epoch Type",
    y = "SD of RMSSD Time"
  ) +
  ylim(0, 140) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )

p2 <- ggplot(rmssd_variability_EVA_Patients_5min, aes(x = epoch_type, y = sd_RMSSD_Magnitude)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Standard Deviation of RMSSD Amplitude for each ID by Epoch Type",
    x = "Epoch Type",
    y = "SD of RMSSD Magnitude"
  ) +
  ylim(0, 200) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )

p3 <- ggplot(rmssd_variability_EVA_Patients_5min, aes(x = epoch_type, y = median_RMSSD_Time)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Median of RMSSD Time for each ID by Epoch Type",
    x = "Epoch Type",
    y = "Median of RMSSD Time"
  ) +
  ylim(0, 140) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )

p4 <- ggplot(rmssd_variability_EVA_Patients_5min, aes(x = epoch_type, y = median_RMSSD_Magnitude)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Median of RMSSD Amplitude for each ID by Epoch Type",
    x = "Epoch Type",
    y = "Median of RMSSD Magnitude"
  ) +
  ylim(0, 200) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )

grid.arrange(p1, p2, p3, p4, ncol = 2)

mycontrols <- tableby.control(test = TRUE, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "meansd", "medianq1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels = list(
    meansd = "Mean (SD)",
    # medianq1q3 = "Median (Q1, Q3)",
    # range = "Min - Max",
    quantile5 = "5th Percentile",
    quantile95 = "95th Percentile",
    Nmiss2 = "Missing"
  ))

tab.test <- tableby(epoch_type ~ mean_RMSSD_Time + median_RMSSD_Time + sd_RMSSD_Time + mean_RMSSD_Magnitude + median_RMSSD_Magnitude + sd_RMSSD_Magnitude, data = rmssd_variability_EVA_Patients_5min, control=mycontrols) 

summary(tab.test)
fast epoch (N=61) medium epoch (N=58) slow epoch (N=59) Total (N=178) p value
mean_RMSSD_Time < 0.001
   N 61 58 59 178
   Mean (SD) 54.216 (6.088) 68.275 (10.305) 85.045 (13.828) 69.016 (16.454)
   Median (Q1, Q3) 53.943 (49.080, 57.485) 65.690 (62.031, 72.868) 82.040 (76.702, 91.950) 65.590 (55.995, 77.910)
   Min 43.885 51.220 64.010 43.885
   Max 71.790 101.345 138.230 138.230
median_RMSSD_Time < 0.001
   N 61 58 59 178
   Mean (SD) 53.136 (6.481) 67.132 (9.843) 83.876 (14.217) 67.885 (16.494)
   Median (Q1, Q3) 52.540 (48.160, 57.180) 65.590 (60.888, 70.885) 80.715 (73.755, 89.062) 65.455 (55.622, 76.742)
   Min 41.540 50.480 64.010 41.540
   Max 71.790 101.345 138.230 138.230
sd_RMSSD_Time < 0.001
   N 47 40 44 131
   N-Miss 14 18 15 47
   Mean (SD) 9.304 (5.801) 12.509 (10.274) 15.703 (9.257) 12.432 (8.892)
   Median (Q1, Q3) 8.389 (4.755, 12.623) 10.004 (4.393, 17.119) 13.932 (10.789, 18.330) 10.753 (6.347, 16.092)
   Min 0.530 1.770 0.559 0.530
   Max 25.172 51.456 50.982 51.456
mean_RMSSD_Magnitude < 0.001
   N 61 58 59 178
   Mean (SD) 78.622 (29.029) 63.031 (24.903) 51.258 (25.158) 64.472 (28.647)
   Median (Q1, Q3) 76.396 (62.313, 90.594) 59.629 (46.562, 72.094) 47.182 (37.784, 62.575) 61.278 (43.933, 79.232)
   Min 24.450 27.860 17.460 17.460
   Max 158.900 151.905 177.435 177.435
median_RMSSD_Magnitude < 0.001
   N 61 58 59 178
   Mean (SD) 73.035 (28.061) 60.919 (25.692) 47.339 (24.965) 60.570 (28.207)
   Median (Q1, Q3) 72.370 (54.360, 86.700) 53.445 (42.785, 71.294) 43.950 (34.177, 49.892) 52.252 (40.903, 78.102)
   Min 24.450 27.860 17.460 17.460
   Max 158.900 151.905 177.435 177.435
sd_RMSSD_Magnitude 0.077
   N 47 40 44 131
   N-Miss 14 18 15 47
   Mean (SD) 33.781 (22.912) 26.299 (22.139) 27.737 (24.361) 29.466 (23.236)
   Median (Q1, Q3) 26.073 (17.804, 44.129) 22.894 (11.119, 33.471) 20.582 (11.211, 34.803) 22.947 (13.109, 39.995)
   Min 4.778 3.691 1.683 1.683
   Max 117.665 129.606 98.889 129.606
knitr::kable(rmssd_variability_EVA_Patients_5min %>%
               group_by(epoch_type) %>%
                summarize(
                  p5_sd_RMSSD_Time = quantile(sd_RMSSD_Time, 0.05, na.rm = TRUE),
                  p5_median_RMSSD_Time = quantile(median_RMSSD_Time, 0.05, na.rm = TRUE),
                  p5_mean_RMSSD_Time = quantile(mean_RMSSD_Time, 0.05, na.rm = TRUE),
                  p95_sd_RMSSD_Time = quantile(sd_RMSSD_Time, 0.95, na.rm = TRUE),
                  p95_median_RMSSD_Time = quantile(median_RMSSD_Time, 0.95, na.rm = TRUE),
                  p95_mean_RMSSD_Time = quantile(mean_RMSSD_Time, 0.95, na.rm = TRUE),
                  p5_sd_RMSSD_Magnitude = quantile(sd_RMSSD_Magnitude, 0.05, na.rm = TRUE),
                  p5_median_RMSSD_Magnitude = quantile(median_RMSSD_Magnitude, 0.05, na.rm = TRUE),
                  p5_mean_RMSSD_Magnitude = quantile(mean_RMSSD_Magnitude, 0.05, na.rm = TRUE),
                  p95_sd_RMSSD_Magnitude = quantile(sd_RMSSD_Magnitude, 0.95, na.rm = TRUE),
                  p95_median_RMSSD_Magnitude = quantile(median_RMSSD_Magnitude, 0.95, na.rm = TRUE),
                  p95_mean_RMSSD_Magnitude = quantile(mean_RMSSD_Magnitude, 0.95, na.rm = TRUE),
                ), 
  format = "html")
epoch_type p5_sd_RMSSD_Time p5_median_RMSSD_Time p5_mean_RMSSD_Time p95_sd_RMSSD_Time p95_median_RMSSD_Time p95_mean_RMSSD_Time p5_sd_RMSSD_Magnitude p5_median_RMSSD_Magnitude p5_mean_RMSSD_Magnitude p95_sd_RMSSD_Magnitude p95_median_RMSSD_Magnitude p95_mean_RMSSD_Magnitude
fast epoch 2.218913 45.1050 46.38000 20.80387 63.55000 64.08714 7.056163 30.3700 31.5700 75.51322 120.1500 130.9700
medium epoch 2.220669 54.7290 56.17683 30.43147 82.58425 89.73275 5.619383 30.2185 30.2185 53.54059 109.6155 103.1257
slow epoch 4.889204 67.1335 69.06625 32.70255 112.02900 112.02900 2.264742 22.3850 20.9756 77.89687 87.6005 83.2505
p1 <- ggplot(EVA_Patients_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "fast epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Time)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Time Distribution by Fast Epoch",
    x = "ID",
    y = "RMSSD Time"
  ) +
  ylim(0, 200) +
  theme_minimal()

p2 <- ggplot(EVA_Patients_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "medium epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Time)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Time Distribution by Medium Epoch",
    x = "ID",
    y = "RMSSD Time"
  ) +
  ylim(0, 200) +
  theme_minimal()

p3 <- ggplot(EVA_Patients_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "slow epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Time)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Time Distribution by Slow Epoch",
    x = "ID",
    y = "RMSSD Time"
  ) +
  ylim(0, 200) +
  theme_minimal()

grid.arrange(p1, p2, p3, ncol = 3)

p1 <- ggplot(EVA_Patients_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "fast epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Magnitude)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Magnitude Distribution by Fast Epoch",
    x = "ID",
    y = "RMSSD Magnitude"
  ) +
  ylim(0, 400) +
  theme_minimal()

p2 <- ggplot(EVA_Patients_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "medium epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Magnitude)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Magnitude Distribution by Medium Epoch",
    x = "ID",
    y = "RMSSD Magnitude"
  ) +
  ylim(0, 400) +
  theme_minimal()

p3 <- ggplot(EVA_Patients_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "slow epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Magnitude)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Magnitude Distribution by Slow Epoch",
    x = "ID",
    y = "RMSSD Magnitude"
  ) +
  ylim(0, 400) +
  theme_minimal()

grid.arrange(p1, p2, p3, ncol = 3)

2.2 Describe how the epochs are distributed over time

Question: Are they more in the proximal or distal small bowel?

p1 <- ggplot(EVA_Patients_5min %>% filter(epoch_type == "fast epoch"), aes(x = time_fraction)) +
  geom_histogram(fill = "steelblue", color = "white") +
  labs(
    x = "        ",
    y = "Fast Rhythmic Intervals"
  ) +
  ylim(0, 35) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )
p2 <- ggplot(EVA_Patients_5min %>% filter(epoch_type == "medium epoch"), aes(x = time_fraction)) +
  geom_histogram(fill = "steelblue", color = "white") +
  labs(
    x = "Time Fraction since Gastric Emptying Time",
    y = "Medium Rhythmic Intervals"
  ) +
  ylim(0, 35) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )
p3 <- ggplot(EVA_Patients_5min %>% filter(epoch_type == "slow epoch"), aes(x = time_fraction)) +
  geom_histogram(fill = "steelblue", color = "white") +
  labs(
    x = "        ",
    y = "Slow Rhythmic Intervals"
  ) +
  ylim(0, 35) +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_text(size = 14),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  )

grid.arrange(p1, p2, p3, ncol = 3)

p1

p2

p3

Question: Do they tend to occur together in clumps or separated out?

fast_epochs <- EVA_Patients_5min %>% 
  filter(epoch_type == "fast epoch", !is.na(time_from_gastric_emptying_minute)) %>%
  arrange(combined_ID, time_from_gastric_emptying_minute) %>%
  group_by(combined_ID) %>%
  mutate(time_gap = time_from_gastric_emptying_minute - lag(time_from_gastric_emptying_minute) - 5) %>%
  ungroup()

fast_epochs <- fast_epochs %>%
  group_by(combined_ID) %>%
  summarize(
    n_fast_epochs = n(),
    avg_gap = round(mean(time_gap, na.rm = TRUE),2),
    median_gap = round(median(time_gap, na.rm = TRUE),2),
    min_gap = round(min(time_gap, na.rm = TRUE),2),
    max_gap = round(max(time_gap, na.rm = TRUE),2)
  )
medium_epochs <- EVA_Patients_5min %>% 
  filter(epoch_type == "medium epoch", !is.na(time_from_gastric_emptying_minute)) %>%
  arrange(combined_ID, time_from_gastric_emptying_minute) %>%
  group_by(combined_ID) %>%
  mutate(time_gap = time_from_gastric_emptying_minute - lag(time_from_gastric_emptying_minute)) %>%
  ungroup()

medium_epochs <- medium_epochs %>%
  group_by(combined_ID) %>%
  summarize(
    n_medium_epochs = n(),
    avg_gap = round(mean(time_gap, na.rm = TRUE),2),
    median_gap = round(median(time_gap, na.rm = TRUE),2),
    min_gap = round(min(time_gap, na.rm = TRUE),2),
    max_gap = round(max(time_gap, na.rm = TRUE),2)
  )
slow_epochs <- EVA_Patients_5min %>% 
  filter(epoch_type == "slow epoch", !is.na(time_from_gastric_emptying_minute)) %>%
  arrange(combined_ID, time_from_gastric_emptying_minute) %>%
  group_by(combined_ID) %>%
  mutate(time_gap = time_from_gastric_emptying_minute - lag(time_from_gastric_emptying_minute)) %>%
  ungroup()

slow_epochs <- slow_epochs %>%
  group_by(combined_ID) %>%
  summarize(
    n_slow_epochs = n(),
    avg_gap = round(mean(time_gap, na.rm = TRUE),2),
    median_gap = round(median(time_gap, na.rm = TRUE),2),
    min_gap = round(min(time_gap, na.rm = TRUE),2),
    max_gap = round(max(time_gap, na.rm = TRUE),2)
  )

2.3 Epoch Type - Time Fraction Plot

EVA_Patients_5min <- EVA_Patients_5min %>% 
  group_by(combined_ID) %>%
  mutate(time_fraction_end = time_fraction + 5/60/total_time)
# ggplot(EVA_Patients_5min) +
#   geom_segment(aes(x = time_fraction, xend = time_fraction_end, 
#                    y = as.character(combined_ID), yend = as.character(combined_ID), color = epoch_type)) +
#   labs(x = "Time Fraction", y = "ID", color = "Epoch Type") +
#   theme_minimal() +
#   theme(axis.text.y = element_text(size = 7))
EVA_Patients_5min$ID <- as.integer(factor(as.factor(EVA_Patients_5min$combined_ID)))
EVA_Patients_5min <- EVA_Patients_5min %>%
  mutate(ymin = ID - 0.5,
         ymax = ID + 0.5)

ggplot(EVA_Patients_5min  %>% mutate(
  epoch_type = case_when(
    epoch_type == "fast epoch" ~ "fast",
    epoch_type == "medium epoch" ~ "medium",
    epoch_type == "slow epoch" ~ "slow"
  )
)) +
  geom_rect(aes(xmin = time_fraction, xmax = time_fraction_end,
                ymin = ymin, ymax = ymax, fill = epoch_type)) +
  scale_y_continuous(
    breaks = EVA_Patients_5min$ID,
    labels = EVA_Patients_5min$combined_ID
  ) +
  labs(x = "Time Fraction since Gastric Emptying Time", y = "Participant ID", fill = "Type of Rhythmic Intervals") +
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_blank(),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  ) +   
  geom_segment(
    aes(x = -Inf, xend = Inf, y = ymin, yend = ymin),
    color = "black", linewidth = 0.02
  )

2.4 Is Total Time Correlated to Number of Epochs?

library(corrplot)
res <- cor(data.frame(
  total_time = (EVA_Patients_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$total_time,
  total_no_missing_time = (EVA_Patients_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$total_no_missing_time,
  number_of_total_epochs = (EVA_Patients_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$number_of_total_epochs,
  number_of_fast_epochs = (EVA_Patients_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$number_of_fast_epochs,
  number_of_medium_epochs = (EVA_Patients_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$number_of_medium_epochs,
  number_of_slow_epochs = (EVA_Patients_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$number_of_slow_epochs
  ), 
  use = "complete.obs", method = "spearman")
# res
corrplot(res, type = "upper", 
         tl.col = "black", tl.srt = 45)

3 EVA Controls Analyses

EVA_Controls_5min <- read.csv("C:/Users/zhaoz03/OneDrive - The Mount Sinai Hospital/MY PROJECTS/Project 11. GI Pressue EVA Study/Verified Dataset/EVA_Controls_5min.csv")


EVA_Controls_5min <- EVA_Controls_5min %>%
  filter(!is.na(epoch_type)) %>% 
  group_by(combined_ID) %>% 
  mutate(number_of_fast_epochs = sum(epoch_type == "fast epoch"), number_of_medium_epochs = sum(epoch_type == "medium epoch"), number_of_slow_epochs = sum(epoch_type == "slow epoch")) %>%
  ungroup() %>% 
  mutate(
    number_of_total_epochs = rowSums(across(c(number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs))),
    number_of_fast_epochs_frequency = number_of_fast_epochs/total_no_missing_time,
    number_of_medium_epochs_frequency = number_of_medium_epochs/total_no_missing_time,
    number_of_slow_epochs_frequency = number_of_slow_epochs/total_no_missing_time,
    number_of_total_epochs_frequency = number_of_total_epochs/total_no_missing_time)


EVA_Controls_5min <- EVA_Controls_5min %>% filter(full_window_size == "Yes", !is.na(epoch_type)) 
EVA_Controls_5min <- EVA_Controls_5min %>% filter(prefiltration == FALSE)

EVA_Controls_5min <- EVA_Controls_5min %>% 
  mutate(time_from_swallow_hour = time_from_swallow_minute/60,
         time_from_gastric_emptying_hour = time_from_gastric_emptying_minute/60)

EVA_Controls_5min <- EVA_Controls_5min %>%
  mutate(time_fraction = time_from_gastric_emptying_hour/total_time)

EVA_Controls_5min <- EVA_Controls_5min %>%
  rename(sum_of_amplitude = total_pressure_exposure,
        imputed_sum_of_amplitude = imputed_total_pressure_exposure)

EVA_Controls_5min <- EVA_Controls_5min %>%
  mutate(motility_index = log(sum_of_amplitude * total_number_of_contractions + 1))

3.1 Describe within-subject RMSSD variability for Control Data

Question: Does it seem true that for fast epochs RMSSD is more consistent?

Answer: It seems true that faster epochs RMSSD is more consistent, especially for RMSSD Time.

rmssd_variability_EVA_Controls_5min <- EVA_Controls_5min %>%
  group_by(combined_ID, epoch_type) %>%
  summarize(
    mean_RMSSD_Time = mean(epoch_RMSSD_Time),
    median_RMSSD_Time = median(epoch_RMSSD_Time),
    sd_RMSSD_Time = sd(epoch_RMSSD_Time),
    mean_RMSSD_Magnitude = mean(epoch_RMSSD_Magnitude),
    median_RMSSD_Magnitude = median(epoch_RMSSD_Magnitude),
    sd_RMSSD_Magnitude = sd(epoch_RMSSD_Magnitude),
    n_epochs = n(),
    .groups = "drop"
  )
p1 <- ggplot(rmssd_variability_EVA_Controls_5min, aes(x = epoch_type, y = sd_RMSSD_Time)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Standard Deviation of RMSSD Time for each ID by Epoch Type",
    x = "Epoch Type",
    y = "SD of RMSSD Time"
  ) +
  theme_minimal()

p2 <- ggplot(rmssd_variability_EVA_Controls_5min, aes(x = epoch_type, y = sd_RMSSD_Magnitude)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Standard Deviation of RMSSD Magnitude for each ID by Epoch Type",
    x = "Epoch Type",
    y = "SD of RMSSD Magnitude"
  ) +
  theme_minimal()

p3 <- ggplot(rmssd_variability_EVA_Controls_5min, aes(x = epoch_type, y = median_RMSSD_Time)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Median of RMSSD Time for each ID by Epoch Type",
    x = "Epoch Type",
    y = "Median of RMSSD Time"
  ) +
  theme_minimal()

p4 <- ggplot(rmssd_variability_EVA_Controls_5min, aes(x = epoch_type, y = median_RMSSD_Magnitude)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  labs(
    title = "Median of RMSSD Magnitude for each ID by Epoch Type",
    x = "Epoch Type",
    y = "Median of RMSSD Magnitude"
  ) +
  theme_minimal()

grid.arrange(p1, p2, p3, p4, ncol = 2)

mycontrols <- tableby.control(test = TRUE, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "meansd", "medianq1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels = list(
    meansd = "Mean (SD)",
    # medianq1q3 = "Median (Q1, Q3)",
    # range = "Min - Max",
    quantile5 = "5th Percentile",
    quantile95 = "95th Percentile",
    Nmiss2 = "Missing"
  ))

tab.test <- tableby(epoch_type ~ mean_RMSSD_Time + median_RMSSD_Time + sd_RMSSD_Time + mean_RMSSD_Magnitude + median_RMSSD_Magnitude + sd_RMSSD_Magnitude, data = rmssd_variability_EVA_Controls_5min, control=mycontrols) 

summary(tab.test)
fast epoch (N=11) medium epoch (N=8) slow epoch (N=10) Total (N=29) p value
mean_RMSSD_Time < 0.001
   N 11 8 10 29
   Mean (SD) 53.880 (5.451) 61.980 (7.329) 89.182 (19.362) 68.288 (19.834)
   Median (Q1, Q3) 53.760 (49.975, 57.568) 63.686 (57.229, 66.366) 84.019 (80.527, 95.988) 63.740 (54.920, 80.514)
   Min 46.780 49.140 68.541 46.780
   Max 64.695 72.147 135.417 135.417
median_RMSSD_Time < 0.001
   N 11 8 10 29
   Mean (SD) 52.868 (6.292) 61.939 (7.267) 83.365 (15.330) 65.887 (16.824)
   Median (Q1, Q3) 53.760 (48.420, 56.660) 63.678 (57.229, 66.366) 80.938 (73.425, 86.995) 63.740 (54.920, 71.835)
   Min 42.750 49.140 65.970 42.750
   Max 64.695 71.835 116.840 116.840
sd_RMSSD_Time 0.123
   N 7 6 9 22
   N-Miss 4 2 1 7
   Mean (SD) 11.714 (5.622) 8.811 (4.061) 26.624 (20.648) 17.022 (15.606)
   Median (Q1, Q3) 12.554 (7.798, 16.008) 8.637 (6.642, 10.105) 20.541 (8.871, 38.521) 11.506 (7.605, 17.869)
   Min 3.665 3.550 6.433 3.550
   Max 18.166 15.473 65.530 65.530
mean_RMSSD_Magnitude 0.041
   N 11 8 10 29
   Mean (SD) 96.743 (51.984) 55.823 (20.957) 51.975 (23.124) 70.017 (41.246)
   Median (Q1, Q3) 83.154 (50.300, 141.909) 53.901 (38.620, 71.942) 45.989 (35.049, 58.565) 53.913 (41.480, 83.154)
   Min 42.564 28.360 27.810 27.810
   Max 189.670 83.405 101.350 189.670
median_RMSSD_Magnitude 0.071
   N 11 8 10 29
   Mean (SD) 84.860 (48.989) 55.849 (21.074) 49.098 (24.307) 64.525 (37.776)
   Median (Q1, Q3) 69.730 (50.300, 107.558) 53.620 (38.620, 72.134) 41.740 (35.720, 53.392) 52.460 (38.900, 72.360)
   Min 36.570 28.360 28.680 28.360
   Max 189.670 83.405 112.800 189.670
sd_RMSSD_Magnitude 0.461
   N 7 6 9 22
   N-Miss 4 2 1 7
   Mean (SD) 49.549 (52.164) 20.166 (14.553) 20.638 (17.663) 29.709 (33.752)
   Median (Q1, Q3) 37.371 (12.714, 60.335) 18.262 (8.720, 27.837) 16.352 (11.450, 19.040) 17.468 (10.933, 35.443)
   Min 8.980 4.936 7.005 4.936
   Max 154.397 42.971 65.932 154.397
knitr::kable(rmssd_variability_EVA_Controls_5min %>%
               group_by(epoch_type) %>%
                summarize(
                  p5_sd_RMSSD_Time = quantile(sd_RMSSD_Time, 0.05, na.rm = TRUE),
                  p5_median_RMSSD_Time = quantile(median_RMSSD_Time, 0.05, na.rm = TRUE),
                  p5_mean_RMSSD_Time = quantile(mean_RMSSD_Time, 0.05, na.rm = TRUE),
                  p95_sd_RMSSD_Time = quantile(sd_RMSSD_Time, 0.95, na.rm = TRUE),
                  p95_median_RMSSD_Time = quantile(median_RMSSD_Time, 0.95, na.rm = TRUE),
                  p95_mean_RMSSD_Time = quantile(mean_RMSSD_Time, 0.95, na.rm = TRUE),
                  p5_sd_RMSSD_Magnitude = quantile(sd_RMSSD_Magnitude, 0.05, na.rm = TRUE),
                  p5_median_RMSSD_Magnitude = quantile(median_RMSSD_Magnitude, 0.05, na.rm = TRUE),
                  p5_mean_RMSSD_Magnitude = quantile(mean_RMSSD_Magnitude, 0.05, na.rm = TRUE),
                  p95_sd_RMSSD_Magnitude = quantile(sd_RMSSD_Magnitude, 0.95, na.rm = TRUE),
                  p95_median_RMSSD_Magnitude = quantile(median_RMSSD_Magnitude, 0.95, na.rm = TRUE),
                  p95_mean_RMSSD_Magnitude = quantile(mean_RMSSD_Magnitude, 0.95, na.rm = TRUE),
                ), 
  format = "html")
epoch_type p5_sd_RMSSD_Time p5_median_RMSSD_Time p5_mean_RMSSD_Time p95_sd_RMSSD_Time p95_median_RMSSD_Time p95_mean_RMSSD_Time p5_sd_RMSSD_Magnitude p5_median_RMSSD_Magnitude p5_mean_RMSSD_Magnitude p95_sd_RMSSD_Magnitude p95_median_RMSSD_Magnitude p95_mean_RMSSD_Magnitude
fast epoch 4.137823 44.62000 47.08500 17.80919 61.5675 61.57000 9.257187 41.2700 43.69814 131.27503 169.38000 172.08300
medium epoch 4.190466 51.53925 51.53925 14.21916 70.3475 70.55008 5.428812 31.6570 31.65700 39.64327 83.04975 82.78083
slow epoch 6.818168 66.56400 69.55779 58.76796 108.7580 120.26817 8.507823 29.6565 30.95100 48.68152 87.29400 91.46050
p1 <- ggplot(EVA_Controls_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "fast epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Time)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Time Distribution by Fast Epoch",
    x = "ID",
    y = "RMSSD Time"
  ) +
  ylim(0, 200) +
  theme_minimal()

p2 <- ggplot(EVA_Controls_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "medium epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Time)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Time Distribution by Medium Epoch",
    x = "ID",
    y = "RMSSD Time"
  ) +
  ylim(0, 200) +
  theme_minimal()

p3 <- ggplot(EVA_Controls_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "slow epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Time)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Time Distribution by Slow Epoch",
    x = "ID",
    y = "RMSSD Time"
  ) +
  ylim(0, 200) +
  theme_minimal()

grid.arrange(p1, p2, p3, ncol = 3)

p1 <- ggplot(EVA_Controls_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "fast epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Magnitude)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Magnitude Distribution by Fast Epoch",
    x = "ID",
    y = "RMSSD Magnitude"
  ) +
  ylim(0, 400) +
  theme_minimal()

p2 <- ggplot(EVA_Controls_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "medium epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Magnitude)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Magnitude Distribution by Medium Epoch",
    x = "ID",
    y = "RMSSD Magnitude"
  ) +
  ylim(0, 400) +
  theme_minimal()

p3 <- ggplot(EVA_Controls_5min %>% mutate(new_ID = paste0(combined_ID, epoch_type)) %>% filter(epoch_type == "slow epoch"), aes(x = factor(new_ID), y = epoch_RMSSD_Magnitude)) +
  geom_boxplot() +
  labs(
    title = "RMSSD Magnitude Distribution by Slow Epoch",
    x = "ID",
    y = "RMSSD Magnitude"
  ) +
  ylim(0, 400) +
  theme_minimal()

grid.arrange(p1, p2, p3, ncol = 3)

3.2 Describe how the epochs are distributed over time

Question: Are they more in the proximal or distal small bowel?

p1 <- ggplot(EVA_Controls_5min %>% filter(epoch_type == "fast epoch"), aes(x = time_fraction)) +
  geom_histogram(fill = "steelblue", color = "white") +
  labs(
    title = "Distribution of Fast Epochs Over Time Fraction Since Gastric Emptying",
    x = "Time Fraction",
    y = "Number of Epochs"
  ) +
  theme_minimal()

p2 <- ggplot(EVA_Controls_5min %>% filter(epoch_type == "medium epoch"), aes(x = time_fraction)) +
  geom_histogram(fill = "steelblue", color = "white") +
  labs(
    title = "Distribution of Medium Epochs Over Time Fraction Since Gastric Emptying",
    x = "Time Fraction",
    y = "Number of Epochs"
  ) +
  theme_minimal()

p3 <- ggplot(EVA_Controls_5min %>% filter(epoch_type == "slow epoch"), aes(x = time_fraction)) +
  geom_histogram(fill = "steelblue", color = "white") +
  labs(
    title = "Distribution of Slow Epochs Over Time Fraction Since Gastric Emptying",
    x = "Time Fraction",
    y = "Number of Epochs"
  ) +
  theme_minimal()

grid.arrange(p1, p2, p3, ncol = 3)

Question: Do they tend to occur together in clumps or separated out?

fast_epochs <- EVA_Controls_5min %>% 
  filter(epoch_type == "fast epoch", !is.na(time_from_gastric_emptying_minute)) %>%
  arrange(combined_ID, time_from_gastric_emptying_minute) %>%
  group_by(combined_ID) %>%
  mutate(time_gap = time_from_gastric_emptying_minute - lag(time_from_gastric_emptying_minute) - 5) %>%
  ungroup()

fast_epochs <- fast_epochs %>%
  group_by(combined_ID) %>%
  summarize(
    n_fast_epochs = n(),
    avg_gap = round(mean(time_gap, na.rm = TRUE),2),
    median_gap = round(median(time_gap, na.rm = TRUE),2),
    min_gap = round(min(time_gap, na.rm = TRUE),2),
    max_gap = round(max(time_gap, na.rm = TRUE),2)
  )
medium_epochs <- EVA_Controls_5min %>% 
  filter(epoch_type == "medium epoch", !is.na(time_from_gastric_emptying_minute)) %>%
  arrange(combined_ID, time_from_gastric_emptying_minute) %>%
  group_by(combined_ID) %>%
  mutate(time_gap = time_from_gastric_emptying_minute - lag(time_from_gastric_emptying_minute)) %>%
  ungroup()

medium_epochs <- medium_epochs %>%
  group_by(combined_ID) %>%
  summarize(
    n_medium_epochs = n(),
    avg_gap = round(mean(time_gap, na.rm = TRUE),2),
    median_gap = round(median(time_gap, na.rm = TRUE),2),
    min_gap = round(min(time_gap, na.rm = TRUE),2),
    max_gap = round(max(time_gap, na.rm = TRUE),2)
  )
slow_epochs <- EVA_Controls_5min %>% 
  filter(epoch_type == "slow epoch", !is.na(time_from_gastric_emptying_minute)) %>%
  arrange(combined_ID, time_from_gastric_emptying_minute) %>%
  group_by(combined_ID) %>%
  mutate(time_gap = time_from_gastric_emptying_minute - lag(time_from_gastric_emptying_minute)) %>%
  ungroup()

slow_epochs <- slow_epochs %>%
  group_by(combined_ID) %>%
  summarize(
    n_slow_epochs = n(),
    avg_gap = round(mean(time_gap, na.rm = TRUE),2),
    median_gap = round(median(time_gap, na.rm = TRUE),2),
    min_gap = round(min(time_gap, na.rm = TRUE),2),
    max_gap = round(max(time_gap, na.rm = TRUE),2)
  )

3.3 Epoch Type - Time Fraction Plot

EVA_Controls_5min <- EVA_Controls_5min %>% 
  group_by(combined_ID) %>%
  mutate(time_fraction_end = time_fraction + 5/60/total_time)
# ggplot(EVA_Controls_5min) +
#   geom_segment(aes(x = time_fraction, xend = time_fraction_end, 
#                    y = as.character(combined_ID), yend = as.character(combined_ID), color = epoch_type)) +
#   labs(x = "Time Fraction", y = "ID", color = "Epoch Type") +
#   theme_minimal() +
#   theme(axis.text.y = element_text(size = 7))
EVA_Controls_5min$ID <- as.integer(factor(as.factor(EVA_Controls_5min$combined_ID)))
EVA_Controls_5min <- EVA_Controls_5min %>%
  mutate(ymin = ID - 0.5,
         ymax = ID + 0.5)

ggplot(EVA_Controls_5min %>% mutate(
  epoch_type = case_when(
    epoch_type == "fast epoch" ~ "fast",
    epoch_type == "medium epoch" ~ "medium",
    epoch_type == "slow epoch" ~ "slow"
  )
)) +
  geom_rect(aes(xmin = time_fraction, xmax = time_fraction_end,
                ymin = ymin, ymax = ymax, fill = epoch_type)) +
  scale_y_continuous(
    breaks = EVA_Controls_5min$ID,
    labels = EVA_Controls_5min$combined_ID
  ) +
  labs(x = "Time Fraction since Gastric Emptying Time", y = "Participant ID", fill = "Type of Rhythmic Intervals") +
  theme(
    text = element_text(size = 14),
    axis.text.x = element_text(size = 14),
    axis.text.y = element_blank(),
    axis.title.x = element_text(size = 18),
    axis.title.y = element_text(size = 18),
    plot.title = element_text(size = 16)
  ) +   
  geom_segment(
    aes(x = -Inf, xend = Inf, y = ymin, yend = ymin),
    color = "black", linewidth = 0.02
  )

3.4 Is Total Time Correlated to Number of Epochs?

library(corrplot)
res <- cor(data.frame(
  total_time = (EVA_Controls_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$total_time,
  total_no_missing_time = (EVA_Controls_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$total_no_missing_time,
  number_of_total_epochs = (EVA_Controls_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$number_of_total_epochs,
  number_of_fast_epochs = (EVA_Controls_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$number_of_fast_epochs,
  number_of_medium_epochs = (EVA_Controls_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$number_of_medium_epochs,
  number_of_slow_epochs = (EVA_Controls_5min %>% select(combined_ID, total_time, total_no_missing_time, number_of_total_epochs, number_of_fast_epochs, number_of_medium_epochs, number_of_slow_epochs) %>% unique())$number_of_slow_epochs
  ), 
  use = "complete.obs", method = "spearman")
# res
corrplot(res, type = "upper", 
         tl.col = "black", tl.srt = 45)

# write.csv(fast_epochs, "fast_epochs.csv")
# write.csv(medium_epochs, "medium_epochs.csv")
# write.csv(slow_epochs, "slow_epochs.csv")
# EVA_Patients_5min <- read.csv("C:/Users/zhaoz03/OneDrive - The Mount Sinai Hospital/MY PROJECTS/Project 11. GI Pressue EVA Study/Dataset/EVA_Patients_5min.csv")
# EVA_Patients_5min <- EVA_Patients_5min %>% filter(full_window_size == "Yes", !is.na(epoch_type))

# library(readxl)
# meals <- read_excel("C:/Users/zhaoz03/OneDrive - The Mount Sinai Hospital/MY PROJECTS/Project 11. GI Pressue EVA Study/PatientEvents_summary_04292025.xlsx", sheet = 1)
# meals <- meals %>% filter(event_name == "Ate meal") %>% select(-c(note, event_name))
# meals$patient_id <- as.integer(meals$patient_id)

4 Tables

4.1 Table 1

Control_Patients_5min$source <- "Jack's Data"
EVA_Patients_5min$source <- "EVA Patient"
# EVA_Controls_5min$source <- "EVA Controls"

all_data <- rbind(Control_Patients_5min, EVA_Patients_5min)
all_data <- all_data %>% group_by(combined_ID) %>% slice(1)
Plot_conditioning(all_data, c("total_time", "total_no_missing_time", "no_missing_time_ratio", "total_number_of_contractions", "total_contractions_frequency", "sum_of_amplitude", "total_number_of_contractions_over26.7", "total_contraction_rate_0", "total_contraction_rate_1_3", "total_contraction_rate_4_6", "total_contraction_rate_7_9", "total_contraction_rate_over10", "number_of_contractions_over26.7_per_hour", "imputed_total_number_of_contractions", "imputed_sum_of_amplitude", "number_of_fast_epochs", "number_of_medium_epochs", "number_of_slow_epochs", "number_of_total_epochs", "number_of_fast_epochs_frequency", "number_of_medium_epochs_frequency", "number_of_slow_epochs_frequency", "number_of_total_epochs_frequency", "motility_index"), "source")

mycontrols <- tableby.control(test = T, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "mean", "sd", "median", "q1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels=list(N='Count', Nmiss='Missing Values', mean='mean', sd='SD', median='Median', q1q3='Q1,Q3', min='Min', max='Max'))


tab.test <- tableby(source ~ total_time + total_no_missing_time + no_missing_time_ratio + total_number_of_contractions + total_contractions_frequency + sum_of_amplitude + total_number_of_contractions_over26.7 + total_contraction_rate_0 + total_contraction_rate_1_3 + total_contraction_rate_4_6 + total_contraction_rate_7_9 + total_contraction_rate_over10 + number_of_contractions_over26.7_per_hour + imputed_total_number_of_contractions + imputed_sum_of_amplitude + number_of_fast_epochs + number_of_medium_epochs + number_of_slow_epochs + number_of_total_epochs + number_of_fast_epochs_frequency + number_of_medium_epochs_frequency + number_of_slow_epochs_frequency + number_of_total_epochs_frequency + motility_index, data=all_data, control = mycontrols)

summary(tab.test)
EVA Patient (N=71) Jack’s Data (N=98) Total (N=169) p value
total_time < 0.001
   Count 71 98 169
   mean 5.191 3.762 4.363
   SD 3.099 1.160 2.297
   Median 4.703 3.706 3.951
   Q1,Q3 3.716, 5.750 2.910, 4.409 3.245, 4.973
   Min 1.664 1.312 1.312
   Max 23.128 6.590 23.128
total_no_missing_time 0.226
   Count 71 98 169
   mean 3.393 3.518 3.466
   SD 1.579 1.044 1.293
   Median 3.063 3.428 3.338
   Q1,Q3 2.425, 4.372 2.870, 4.104 2.603, 4.170
   Min 1.010 1.307 1.010
   Max 8.811 6.212 8.811
no_missing_time_ratio < 0.001
   Count 71 98 169
   mean 0.716 0.942 0.847
   SD 0.241 0.084 0.202
   Median 0.766 0.971 0.941
   Q1,Q3 0.613, 0.897 0.939, 0.990 0.788, 0.981
   Min 0.103 0.442 0.103
   Max 0.997 1.000 1.000
total_number_of_contractions 0.012
   Count 71 98 169
   mean 973.676 786.857 865.343
   SD 477.890 392.159 438.755
   Median 885.000 734.000 780.000
   Q1,Q3 615.000, 1239.500 517.500, 951.750 554.000, 1073.000
   Min 252.000 174.000 174.000
   Max 2582.000 2517.000 2582.000
total_contractions_frequency < 0.001
   Count 71 98 169
   mean 312.995 226.842 263.037
   SD 150.872 91.448 127.011
   Median 285.281 218.881 254.833
   Q1,Q3 224.779, 367.898 160.092, 272.262 174.544, 328.726
   Min 118.919 52.572 52.572
   Max 1122.063 433.827 1122.063
sum_of_amplitude 0.051
   Count 71 98 169
   mean 115979.438 100940.428 107258.592
   SD 49744.617 37378.788 43510.713
   Median 106323.294 96916.642 101003.974
   Q1,Q3 82820.367, 148375.073 76487.509, 125008.950 77963.451, 132296.825
   Min 34593.705 31958.930 31958.930
   Max 262383.401 224859.417 262383.401
total_number_of_contractions_over26.7 < 0.001
   Count 71 98 169
   mean 136.352 90.847 109.964
   SD 94.432 68.169 83.103
   Median 115.000 72.000 89.000
   Q1,Q3 59.000, 187.500 41.000, 127.500 47.000, 151.000
   Min 8.000 12.000 8.000
   Max 441.000 338.000 441.000
total_contraction_rate_0 0.493
   Count 71 98 169
   mean 0.237 0.247 0.243
   SD 0.168 0.153 0.159
   Median 0.216 0.222 0.218
   Q1,Q3 0.129, 0.293 0.122, 0.341 0.129, 0.326
   Min 0.012 0.000 0.000
   Max 0.970 0.685 0.970
total_contraction_rate_1_3 0.274
   Count 71 98 169
   mean 0.327 0.344 0.337
   SD 0.087 0.093 0.090
   Median 0.336 0.357 0.343
   Q1,Q3 0.268, 0.389 0.272, 0.411 0.272, 0.401
   Min 0.029 0.110 0.029
   Max 0.488 0.566 0.566
total_contraction_rate_4_6 0.866
   Count 71 98 169
   mean 0.188 0.187 0.188
   SD 0.065 0.071 0.068
   Median 0.189 0.192 0.189
   Q1,Q3 0.137, 0.227 0.133, 0.225 0.133, 0.226
   Min 0.000 0.041 0.000
   Max 0.366 0.364 0.366
total_contraction_rate_7_9 0.836
   Count 71 98 169
   mean 0.130 0.136 0.133
   SD 0.071 0.082 0.077
   Median 0.112 0.115 0.114
   Q1,Q3 0.085, 0.165 0.081, 0.192 0.082, 0.186
   Min 0.000 0.005 0.000
   Max 0.336 0.455 0.455
total_contraction_rate_over10 0.010
   Count 71 98 169
   mean 0.119 0.085 0.099
   SD 0.091 0.071 0.081
   Median 0.106 0.067 0.078
   Q1,Q3 0.048, 0.160 0.030, 0.125 0.038, 0.143
   Min 0.000 0.000 0.000
   Max 0.405 0.283 0.405
number_of_contractions_over26.7_per_hour < 0.001
   Count 71 98 169
   mean 46.195 27.688 35.463
   SD 35.738 23.529 30.590
   Median 38.169 20.876 25.165
   Q1,Q3 20.018, 59.233 11.757, 37.625 15.186, 47.064
   Min 4.460 3.119 3.119
   Max 168.976 155.521 168.976
imputed_total_number_of_contractions < 0.001
   Count 71 98 169
   mean 1561.901 842.918 1144.976
   SD 1075.089 398.189 836.553
   Median 1233.000 788.000 963.000
   Q1,Q3 955.000, 1826.000 577.000, 1006.000 673.000, 1336.000
   Min 332.000 204.000 204.000
   Max 6210.000 2526.000 6210.000
imputed_sum_of_amplitude < 0.001
   Count 71 98 169
   mean 187578.734 108176.988 141535.118
   SD 139333.716 38328.935 102383.712
   Median 158548.129 103445.589 119039.990
   Q1,Q3 115931.473, 200990.440 79719.797, 129270.590 89727.664, 160351.580
   Min 62203.157 34829.877 34829.877
   Max 950697.962 225832.269 950697.962
number_of_fast_epochs 0.361
   Count 71 98 169
   mean 4.113 4.724 4.467
   SD 3.871 4.497 4.244
   Median 3.000 4.000 4.000
   Q1,Q3 1.000, 6.000 1.000, 7.000 1.000, 7.000
   Min 0.000 0.000 0.000
   Max 17.000 27.000 27.000
number_of_medium_epochs 0.013
   Count 71 98 169
   mean 2.592 3.531 3.136
   SD 2.950 2.894 2.946
   Median 2.000 3.000 2.000
   Q1,Q3 1.000, 3.000 1.000, 5.000 1.000, 5.000
   Min 0.000 0.000 0.000
   Max 18.000 13.000 18.000
number_of_slow_epochs 0.006
   Count 71 98 169
   mean 3.437 4.663 4.148
   SD 3.379 3.386 3.427
   Median 3.000 4.000 3.000
   Q1,Q3 1.000, 5.000 2.000, 6.000 1.000, 6.000
   Min 0.000 0.000 0.000
   Max 15.000 15.000 15.000
number_of_total_epochs 0.011
   Count 71 98 169
   mean 10.141 12.918 11.751
   SD 7.769 8.196 8.113
   Median 9.000 12.000 10.000
   Q1,Q3 5.000, 14.000 7.000, 18.000 6.000, 17.000
   Min 1.000 1.000 1.000
   Max 37.000 51.000 51.000
number_of_fast_epochs_frequency 0.651
   Count 71 98 169
   mean 1.275 1.377 1.334
   SD 1.138 1.188 1.165
   Median 1.115 1.108 1.115
   Q1,Q3 0.395, 1.765 0.378, 2.274 0.384, 1.931
   Min 0.000 0.000 0.000
   Max 5.168 4.601 5.168
number_of_medium_epochs_frequency 0.027
   Count 71 98 169
   mean 0.729 1.019 0.897
   SD 0.654 0.837 0.777
   Median 0.622 0.847 0.743
   Q1,Q3 0.300, 0.994 0.339, 1.529 0.315, 1.268
   Min 0.000 0.000 0.000
   Max 3.240 4.302 4.302
number_of_slow_epochs_frequency 0.002
   Count 71 98 169
   mean 0.899 1.295 1.129
   SD 0.722 0.850 0.820
   Median 0.779 1.214 0.980
   Q1,Q3 0.366, 1.406 0.689, 1.832 0.497, 1.649
   Min 0.000 0.000 0.000
   Max 3.232 3.648 3.648
number_of_total_epochs_frequency 0.008
   Count 71 98 169
   mean 2.903 3.691 3.360
   SD 1.803 2.027 1.970
   Median 2.512 3.430 3.091
   Q1,Q3 1.689, 3.855 2.159, 4.985 1.779, 4.427
   Min 0.326 0.302 0.302
   Max 7.846 9.386 9.386
motility_index 0.023
   Count 71 98 169
   mean 18.329 18.001 18.139
   SD 0.928 0.873 0.908
   Median 18.397 18.086 18.147
   Q1,Q3 17.833, 19.057 17.521, 18.615 17.589, 18.737
   Min 15.981 15.750 15.750
   Max 20.278 20.154 20.278
Control_Patients_5min_RMSSD <- Control_Patients_5min %>% 
  group_by(combined_ID, epoch_type) %>%
  summarize(
    mean_RMSSD_Time = mean(epoch_RMSSD_Time),
    median_RMSSD_Time = median(epoch_RMSSD_Time),
    sd_RMSSD_Time = sd(epoch_RMSSD_Time),
    mean_RMSSD_Magnitude = mean(epoch_RMSSD_Magnitude),
    median_RMSSD_Magnitude = median(epoch_RMSSD_Magnitude),
    sd_RMSSD_Magnitude = sd(epoch_RMSSD_Magnitude),
    n_epochs = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(
    id_cols = combined_ID,
    names_from = epoch_type,
    values_from = c(mean_RMSSD_Time, median_RMSSD_Time, sd_RMSSD_Time, mean_RMSSD_Magnitude, median_RMSSD_Magnitude, sd_RMSSD_Magnitude, n_epochs)) %>%
  mutate(source = "Jack's Data")

Control_Patients_5min_total_epoch <- Control_Patients_5min %>% 
  group_by(combined_ID) %>%
  summarize(
    median_RMSSD_Time_total_epoch = median(epoch_RMSSD_Time),
    median_RMSSD_Magnitude_total_epoch = median(epoch_RMSSD_Magnitude),
    .groups = "drop"
  )

Control_Patients_5min_RMSSD <- Control_Patients_5min_RMSSD %>%
  left_join(Control_Patients_5min_total_epoch)
    
EVA_Patients_5min_RMSSD <- EVA_Patients_5min %>% 
  group_by(combined_ID, epoch_type) %>%
  summarize(
    mean_RMSSD_Time = mean(epoch_RMSSD_Time),
    median_RMSSD_Time = median(epoch_RMSSD_Time),
    sd_RMSSD_Time = sd(epoch_RMSSD_Time),
    mean_RMSSD_Magnitude = mean(epoch_RMSSD_Magnitude),
    median_RMSSD_Magnitude = median(epoch_RMSSD_Magnitude),
    sd_RMSSD_Magnitude = sd(epoch_RMSSD_Magnitude),
    n_epochs = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(
    id_cols = combined_ID,
    names_from = epoch_type,
    values_from = c(mean_RMSSD_Time, median_RMSSD_Time, sd_RMSSD_Time, mean_RMSSD_Magnitude, median_RMSSD_Magnitude, sd_RMSSD_Magnitude, n_epochs)
  )%>%
  mutate(source = "EVA Data")

EVA_Patients_5min_total_epoch <- EVA_Patients_5min %>% 
  group_by(combined_ID) %>%
  summarize(
    median_RMSSD_Time_total_epoch = median(epoch_RMSSD_Time),
    median_RMSSD_Magnitude_total_epoch = median(epoch_RMSSD_Magnitude),
    .groups = "drop"
  )

EVA_Patients_5min_RMSSD <- EVA_Patients_5min_RMSSD %>%
  left_join(EVA_Patients_5min_total_epoch)

all_data_RMSSD <- rbind(Control_Patients_5min_RMSSD, EVA_Patients_5min_RMSSD)
mycontrols <- tableby.control(test = T, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "mean", "sd", "median", "q1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels=list(N='Count', Nmiss='Missing Values', mean='mean', sd='SD', median='Median', q1q3='Q1,Q3', min='Min', max='Max'))


tab.test <- tableby(source ~ `median_RMSSD_Time_fast epoch` + `median_RMSSD_Time_medium epoch` + `median_RMSSD_Time_slow epoch`+ median_RMSSD_Time_total_epoch + `median_RMSSD_Magnitude_fast epoch` + `median_RMSSD_Magnitude_medium epoch` + `median_RMSSD_Magnitude_slow epoch` + median_RMSSD_Magnitude_total_epoch + `n_epochs_fast epoch` + `n_epochs_medium epoch` + `n_epochs_slow epoch`, data=all_data_RMSSD, control = mycontrols)

summary(tab.test)
EVA Data (N=71) Jack’s Data (N=98) Total (N=169) p value
median_RMSSD_Time_fast epoch 0.012
   Count 61 83 144
   Missing Values 10 15 25
   mean 53.136 50.465 51.597
   SD 6.481 4.285 5.469
   Median 52.540 49.280 50.237
   Q1,Q3 48.160, 57.180 47.425, 53.165 47.701, 55.321
   Min 41.540 43.860 41.540
   Max 71.790 63.850 71.790
median_RMSSD_Time_medium epoch 0.005
   Count 58 87 145
   Missing Values 13 11 24
   mean 67.132 63.418 64.904
   SD 9.843 7.848 8.859
   Median 65.590 61.950 63.230
   Q1,Q3 60.888, 70.885 58.100, 65.892 59.030, 68.370
   Min 50.480 52.110 50.480
   Max 101.345 96.330 101.345
median_RMSSD_Time_slow epoch 0.042
   Count 59 90 149
   Missing Values 12 8 20
   mean 83.876 78.458 80.604
   SD 14.217 9.098 11.664
   Median 80.715 77.975 79.480
   Q1,Q3 73.755, 89.062 71.345, 84.540 72.840, 86.380
   Min 64.010 52.200 52.200
   Max 138.230 101.360 138.230
median_RMSSD_Time_total_epoch 0.968
   Count 71 98 169
   mean 64.233 64.060 64.133
   SD 12.860 10.438 11.481
   Median 63.615 62.395 63.110
   Q1,Q3 57.370, 69.165 57.683, 70.236 57.460, 69.530
   Min 46.380 46.620 46.380
   Max 138.230 98.730 138.230
median_RMSSD_Magnitude_fast epoch 0.068
   Count 61 83 144
   Missing Values 10 15 25
   mean 73.035 65.556 68.724
   SD 28.061 24.956 26.482
   Median 72.370 61.095 66.670
   Q1,Q3 54.360, 86.700 47.792, 75.675 49.625, 82.172
   Min 24.450 20.160 20.160
   Max 158.900 157.550 158.900
median_RMSSD_Magnitude_medium epoch 0.249
   Count 58 87 145
   Missing Values 13 11 24
   mean 60.919 56.492 58.263
   SD 25.692 23.552 24.440
   Median 53.445 49.800 51.210
   Q1,Q3 42.785, 71.294 40.300, 63.960 40.840, 67.570
   Min 27.860 28.345 27.860
   Max 151.905 149.150 151.905
median_RMSSD_Magnitude_slow epoch 0.045
   Count 59 90 149
   Missing Values 12 8 20
   mean 47.339 43.154 44.811
   SD 24.965 26.142 25.679
   Median 43.950 35.885 40.850
   Q1,Q3 34.177, 49.892 26.372, 49.825 28.360, 50.255
   Min 17.460 20.100 17.460
   Max 177.435 180.510 180.510
median_RMSSD_Magnitude_total_epoch 0.010
   Count 71 98 169
   mean 59.947 52.018 55.349
   SD 24.122 21.697 23.014
   Median 55.440 47.945 51.170
   Q1,Q3 45.390, 71.448 37.714, 59.303 39.895, 62.280
   Min 22.125 21.700 21.700
   Max 139.395 136.030 139.395
n_epochs_fast epoch 0.182
   Count 61 83 144
   Missing Values 10 15 25
   mean 4.787 5.578 5.243
   SD 3.769 4.370 4.131
   Median 4.000 5.000 5.000
   Q1,Q3 2.000, 7.000 2.500, 7.000 2.000, 7.000
   Min 1.000 1.000 1.000
   Max 17.000 27.000 27.000
n_epochs_medium epoch 0.031
   Count 58 87 145
   Missing Values 13 11 24
   mean 3.172 3.977 3.655
   SD 2.968 2.766 2.866
   Median 2.500 3.000 3.000
   Q1,Q3 1.000, 4.000 2.000, 6.000 2.000, 5.000
   Min 1.000 1.000 1.000
   Max 18.000 13.000 18.000
n_epochs_slow epoch 0.028
   Count 59 90 149
   Missing Values 12 8 20
   mean 4.136 5.078 4.705
   SD 3.293 3.219 3.270
   Median 3.000 4.000 4.000
   Q1,Q3 1.500, 5.500 3.000, 7.000 2.000, 6.000
   Min 1.000 1.000 1.000
   Max 15.000 15.000 15.000

4.2 Table 2

4.2.1 Compare GI syptoms between 3 EVA Groups

final_spreadsheet <- read_excel("C:/Users/zhaoz03/OneDrive - The Mount Sinai Hospital/MY PROJECTS/Project 11. GI Pressue EVA Study/Dataset/Final spreadsheet_controls and PWH.xlsx", sheet = 2, skip = 1)

# final_spreadsheet <- final_spreadsheet %>% filter(!is.na(SBTT...34), !is.na(cass_total))

final_spreadsheet <- final_spreadsheet %>%
  mutate(
    groups = case_when(
      as.numeric(SBTT...34) > 360 ~ "prolonged_SBTT",
      cass_total >= 3 & as.numeric(SBTT...34) <= 360 ~ "normal_SBTT_abnormal_CASS",
      cass_total < 3 & as.numeric(SBTT...34) <= 360 ~ "normal_SBTT_normal_CASS"
    ))
EVA_Patients_5min <- EVA_Patients_5min %>%
  left_join(final_spreadsheet %>% select(ID, ortho_intolerance_score, vasomotor_score, secretomotor_score, gi_score, bladder_score, pupillometer_score, total_compass_31, a_heartburn_regurgitation, b_fullness_early_satiety, c_nausea_vomiting, d_bloating, e_upper_abdominal_pain, f_lower_abdominal_pain, total_pagi_sym_score, groups), by = c("combined_ID" = "ID"))
Control_Patients_5min$groups <- "Jack's Data"
all_data <- rbind(Control_Patients_5min, EVA_Patients_5min)
all_data <- all_data %>% group_by(combined_ID) %>% slice(1)
mycontrols <- tableby.control(test = T, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "mean", "sd", "median", "q1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels=list(N='Count', Nmiss='Missing Values', mean='mean', sd='SD', median='Median', q1q3='Q1,Q3', min='Min', max='Max'))


tab.test <- tableby(groups ~ ortho_intolerance_score + vasomotor_score + secretomotor_score + gi_score + bladder_score + pupillometer_score + total_compass_31 + a_heartburn_regurgitation + b_fullness_early_satiety + c_nausea_vomiting + d_bloating + e_upper_abdominal_pain + f_lower_abdominal_pain + total_pagi_sym_score, data=all_data %>% filter(groups != "Jack's Data"), control = mycontrols)

summary(tab.test)
normal_SBTT_abnormal_CASS (N=24) normal_SBTT_normal_CASS (N=30) prolonged_SBTT (N=16) Total (N=70) p value
ortho_intolerance_score 0.833
   Count 24 30 16 70
   mean 2.417 2.533 2.125 2.400
   SD 2.394 2.097 2.156 2.190
   Median 2.000 3.000 2.000 2.000
   Q1,Q3 0.000, 4.250 0.000, 4.000 0.000, 4.000 0.000, 4.000
   Min 0.000 0.000 0.000 0.000
   Max 6.000 6.000 6.000 6.000
vasomotor_score 0.786
   Count 24 30 16 70
   mean 0.417 0.267 0.312 0.329
   SD 0.974 0.828 0.873 0.880
   Median 0.000 0.000 0.000 0.000
   Q1,Q3 0.000, 0.000 0.000, 0.000 0.000, 0.000 0.000, 0.000
   Min 0.000 0.000 0.000 0.000
   Max 3.000 3.000 3.000 3.000
secretomotor_score 0.113
   Count 24 30 16 70
   mean 2.125 1.433 1.312 1.643
   SD 1.454 1.431 1.078 1.394
   Median 2.000 1.500 1.500 2.000
   Q1,Q3 1.000, 3.000 0.000, 2.000 0.000, 2.000 0.000, 3.000
   Min 0.000 0.000 0.000 0.000
   Max 5.000 5.000 3.000 5.000
gi_score 0.865
   Count 24 30 16 70
   mean 7.042 6.933 7.938 7.200
   SD 5.069 4.185 4.932 4.624
   Median 7.000 8.000 6.500 7.000
   Q1,Q3 3.000, 11.250 3.250, 9.750 4.000, 12.000 3.000, 10.750
   Min 0.000 0.000 0.000 0.000
   Max 18.000 15.000 16.000 18.000
bladder_score 0.950
   Count 24 30 16 70
   mean 1.042 1.133 1.000 1.071
   SD 1.628 2.047 1.592 1.788
   Median 0.000 0.000 0.000 0.000
   Q1,Q3 0.000, 1.250 0.000, 1.000 0.000, 1.500 0.000, 1.000
   Min 0.000 0.000 0.000 0.000
   Max 6.000 8.000 5.000 8.000
pupillometer_score 0.321
   Count 24 30 16 70
   mean 3.500 3.467 2.125 3.171
   SD 3.867 3.371 2.802 3.435
   Median 2.500 3.000 2.000 2.000
   Q1,Q3 0.000, 4.500 0.250, 5.000 0.000, 3.000 0.000, 5.000
   Min 0.000 0.000 0.000 0.000
   Max 13.000 14.000 11.000 14.000
total_compass_31 0.724
   Count 24 30 16 70
   mean 16.542 15.767 14.812 15.814
   SD 8.465 9.424 7.985 8.688
   Median 18.000 16.500 14.000 14.500
   Q1,Q3 10.500, 24.000 8.500, 19.750 10.500, 18.000 9.250, 20.750
   Min 3.000 3.000 5.000 3.000
   Max 30.000 38.000 37.000 38.000
a_heartburn_regurgitation 0.258
   Count 24 30 16 70
   mean 1.006 0.533 0.911 0.782
   SD 1.081 0.777 1.234 1.011
   Median 0.571 0.357 0.214 0.429
   Q1,Q3 0.107, 1.464 0.000, 0.571 0.000, 1.714 0.000, 1.107
   Min 0.000 0.000 0.000 0.000
   Max 3.857 3.714 4.000 4.000
b_fullness_early_satiety 0.773
   Count 24 30 16 70
   mean 0.979 0.858 1.156 0.968
   SD 0.964 0.806 1.087 0.924
   Median 0.500 0.750 0.750 0.750
   Q1,Q3 0.250, 1.750 0.250, 1.188 0.250, 2.062 0.250, 1.500
   Min 0.000 0.000 0.000 0.000
   Max 3.000 3.000 3.750 3.750
c_nausea_vomiting 0.213
   Count 24 30 16 70
   mean 0.500 0.344 0.500 0.433
   SD 0.606 0.766 0.911 0.745
   Median 0.333 0.000 0.000 0.000
   Q1,Q3 0.000, 0.750 0.000, 0.250 0.000, 0.583 0.000, 0.667
   Min 0.000 0.000 0.000 0.000
   Max 2.333 3.333 3.000 3.333
d_bloating 0.696
   Count 24 30 16 70
   mean 1.417 1.533 1.188 1.414
   SD 1.487 1.345 1.276 1.367
   Median 1.000 1.500 0.750 1.000
   Q1,Q3 0.000, 2.000 0.000, 2.500 0.000, 2.500 0.000, 2.500
   Min 0.000 0.000 0.000 0.000
   Max 5.000 4.500 3.000 5.000
e_upper_abdominal_pain 0.476
   Count 24 30 16 70
   mean 0.938 0.600 0.781 0.757
   SD 1.106 1.012 1.224 1.089
   Median 0.250 0.000 0.000 0.000
   Q1,Q3 0.000, 2.000 0.000, 1.000 0.000, 1.250 0.000, 1.500
   Min 0.000 0.000 0.000 0.000
   Max 3.000 3.000 3.000 3.000
f_lower_abdominal_pain 0.488
   Count 24 30 16 70
   mean 1.104 0.883 0.719 0.921
   SD 1.142 1.187 1.238 1.175
   Median 1.000 0.000 0.000 0.000
   Q1,Q3 0.000, 2.000 0.000, 2.000 0.000, 1.000 0.000, 2.000
   Min 0.000 0.000 0.000 0.000
   Max 3.000 4.000 4.000 4.000
total_pagi_sym_score 0.644
   Count 24 30 16 70
   mean 0.991 0.792 0.876 0.879
   SD 0.801 0.776 0.735 0.770
   Median 0.923 0.589 0.637 0.652
   Q1,Q3 0.357, 1.438 0.208, 1.061 0.156, 1.543 0.208, 1.382
   Min 0.000 0.000 0.000 0.000
   Max 2.901 3.091 2.077 3.091
# all_data %>% filter(groups == "prolonged_SBTT")
final_spreadsheet_control <- read_excel("C:/Users/zhaoz03/OneDrive - The Mount Sinai Hospital/MY PROJECTS/Project 11. GI Pressue EVA Study/Dataset/Final spreadsheet_controls and PWH.xlsx", sheet = 1, skip = 1)

final_spreadsheet_control <- final_spreadsheet_control %>% filter(!is.na(mcass_brsa))

EVA_Controls_5min <- EVA_Controls_5min %>%
  left_join(final_spreadsheet_control %>% select(SUBJID, SBTT), by = c("combined_ID" = "SUBJID"))

EVA_Controls_5min$total_time_minue <- EVA_Controls_5min$total_time*60
EVA_Controls_5min$SBTT...34 <- as.numeric(EVA_Controls_5min$`SBTT`)
Control_Patients_5min_RMSSD <- Control_Patients_5min %>% 
  group_by(combined_ID, epoch_type) %>%
  summarize(
    mean_RMSSD_Time = mean(epoch_RMSSD_Time),
    median_RMSSD_Time = median(epoch_RMSSD_Time),
    sd_RMSSD_Time = sd(epoch_RMSSD_Time),
    mean_RMSSD_Magnitude = mean(epoch_RMSSD_Magnitude),
    median_RMSSD_Magnitude = median(epoch_RMSSD_Magnitude),
    sd_RMSSD_Magnitude = sd(epoch_RMSSD_Magnitude),
    n_epochs = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(
    id_cols = combined_ID,
    names_from = epoch_type,
    values_from = c(mean_RMSSD_Time, median_RMSSD_Time, sd_RMSSD_Time, mean_RMSSD_Magnitude, median_RMSSD_Magnitude, sd_RMSSD_Magnitude, n_epochs)) %>%
  mutate(groups = "Jack's Data")

Control_Patients_5min_total_epoch <- Control_Patients_5min %>% 
  group_by(combined_ID) %>%
  summarize(
    median_RMSSD_Time_total_epoch = median(epoch_RMSSD_Time),
    median_RMSSD_Magnitude_total_epoch = median(epoch_RMSSD_Magnitude),
    .groups = "drop"
  )

Control_Patients_5min_RMSSD <- Control_Patients_5min_RMSSD %>%
  left_join(Control_Patients_5min_total_epoch)
    
EVA_Patients_5min_RMSSD <- EVA_Patients_5min %>% 
  group_by(combined_ID, groups, epoch_type) %>%
  summarize(
    mean_RMSSD_Time = mean(epoch_RMSSD_Time),
    median_RMSSD_Time = median(epoch_RMSSD_Time),
    sd_RMSSD_Time = sd(epoch_RMSSD_Time),
    mean_RMSSD_Magnitude = mean(epoch_RMSSD_Magnitude),
    median_RMSSD_Magnitude = median(epoch_RMSSD_Magnitude),
    sd_RMSSD_Magnitude = sd(epoch_RMSSD_Magnitude),
    n_epochs = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(
    id_cols = c(combined_ID, groups),
    names_from = epoch_type,
    values_from = c(mean_RMSSD_Time, median_RMSSD_Time, sd_RMSSD_Time, mean_RMSSD_Magnitude, median_RMSSD_Magnitude, sd_RMSSD_Magnitude, n_epochs)
  )

EVA_Patients_5min_total_epoch <- EVA_Patients_5min %>% 
  group_by(combined_ID) %>%
  summarize(
    median_RMSSD_Time_total_epoch = median(epoch_RMSSD_Time),
    median_RMSSD_Magnitude_total_epoch = median(epoch_RMSSD_Magnitude),
    .groups = "drop"
  )

EVA_Patients_5min_RMSSD <- EVA_Patients_5min_RMSSD %>%
  left_join(EVA_Patients_5min_total_epoch)

all_data_RMSSD <- rbind(Control_Patients_5min_RMSSD, EVA_Patients_5min_RMSSD)

4.2.2 Table 1 Extension

mycontrols <- tableby.control(test = T, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "mean", "sd", "median", "q1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels=list(N='Count', Nmiss='Missing Values', mean='mean', sd='SD', median='Median', q1q3='Q1,Q3', min='Min', max='Max'))


tab.test <- tableby(groups ~ total_time + total_no_missing_time + no_missing_time_ratio + total_number_of_contractions + total_contractions_frequency + sum_of_amplitude + total_number_of_contractions_over26.7 + total_contraction_rate_0 + total_contraction_rate_1_3 + total_contraction_rate_4_6 + total_contraction_rate_7_9 + total_contraction_rate_over10 + number_of_contractions_over26.7_per_hour + imputed_total_number_of_contractions + imputed_sum_of_amplitude + number_of_fast_epochs + number_of_medium_epochs + number_of_slow_epochs + number_of_total_epochs + number_of_fast_epochs_frequency + number_of_medium_epochs_frequency + number_of_slow_epochs_frequency + number_of_total_epochs_frequency + motility_index, data=all_data, control = mycontrols)

summary(tab.test)
Jack’s Data (N=98) normal_SBTT_abnormal_CASS (N=24) normal_SBTT_normal_CASS (N=30) prolonged_SBTT (N=16) Total (N=168) p value
total_time < 0.001
   Count 98 24 30 16 168
   mean 3.762 3.848 4.415 8.778 4.369
   SD 1.160 1.064 1.139 4.746 2.303
   Median 3.706 3.989 4.728 7.244 3.953
   Q1,Q3 2.910, 4.409 3.136, 4.507 3.754, 5.299 6.439, 8.270 3.236, 4.987
   Min 1.312 1.779 1.664 5.026 1.312
   Max 6.590 6.081 5.993 23.128 23.128
total_no_missing_time 0.004
   Count 98 24 30 16 168
   mean 3.518 3.027 3.021 4.686 3.471
   SD 1.044 1.203 1.294 1.964 1.295
   Median 3.428 2.870 2.875 4.985 3.343
   Q1,Q3 2.870, 4.104 2.361, 3.726 1.906, 4.236 3.099, 5.907 2.600, 4.186
   Min 1.307 1.107 1.074 1.010 1.010
   Max 6.212 6.064 5.555 8.811 8.811
no_missing_time_ratio < 0.001
   Count 98 24 30 16 168
   mean 0.942 0.777 0.706 0.638 0.847
   SD 0.084 0.177 0.251 0.296 0.202
   Median 0.971 0.815 0.761 0.723 0.941
   Q1,Q3 0.939, 0.990 0.725, 0.894 0.601, 0.926 0.483, 0.834 0.784, 0.981
   Min 0.442 0.311 0.224 0.103 0.103
   Max 1.000 0.997 0.991 0.981 1.000
total_number_of_contractions 0.017
   Count 98 24 30 16 168
   mean 786.857 955.042 884.067 1183.812 866.048
   SD 392.159 372.587 483.217 578.218 439.970
   Median 734.000 941.500 724.500 1108.500 781.500
   Q1,Q3 517.500, 951.750 674.000, 1086.500 545.750, 1190.500 698.500, 1522.250 553.500, 1073.000
   Min 174.000 428.000 252.000 555.000 174.000
   Max 2517.000 1753.000 2006.000 2582.000 2582.000
total_contractions_frequency < 0.001
   Count 98 24 30 16 168
   mean 226.842 328.487 314.751 288.198 262.904
   SD 91.448 76.671 193.568 153.865 127.379
   Median 218.881 334.502 272.254 231.708 254.795
   Q1,Q3 160.092, 272.262 280.663, 373.353 211.168, 326.458 183.643, 355.167 174.421, 328.764
   Min 52.572 152.216 118.919 122.110 52.572
   Max 433.827 484.767 1122.063 581.841 1122.063
sum_of_amplitude 0.009
   Count 98 24 30 16 168
   mean 100940.428 112627.251 103712.522 145984.258 107394.879
   SD 37378.788 39934.317 48057.118 57372.612 43604.595
   Median 96916.642 106655.760 87375.001 148375.073 101171.150
   Q1,Q3 76487.509, 125008.950 87945.762, 136699.811 78928.684, 132581.772 105478.880, 165435.091 77920.505, 132384.909
   Min 31958.930 43679.582 34593.705 60968.451 31958.930
   Max 224859.417 203251.944 237168.559 262383.401 262383.401
total_number_of_contractions_over26.7 0.002
   Count 98 24 30 16 168
   mean 90.847 143.458 121.967 152.562 109.798
   SD 68.169 74.209 109.690 95.568 83.323
   Median 72.000 143.000 86.500 148.500 89.000
   Q1,Q3 41.000, 127.500 103.250, 190.500 50.250, 147.000 74.000, 211.750 46.750, 151.750
   Min 12.000 8.000 9.000 31.000 8.000
   Max 338.000 289.000 441.000 334.000 441.000
total_contraction_rate_0 0.166
   Count 98 24 30 16 168
   mean 0.247 0.175 0.258 0.291 0.243
   SD 0.153 0.101 0.170 0.223 0.159
   Median 0.222 0.170 0.241 0.231 0.219
   Q1,Q3 0.122, 0.341 0.094, 0.232 0.142, 0.307 0.135, 0.419 0.128, 0.327
   Min 0.000 0.012 0.038 0.050 0.000
   Max 0.685 0.389 0.869 0.970 0.970
total_contraction_rate_1_3 0.078
   Count 98 24 30 16 168
   mean 0.344 0.308 0.327 0.356 0.337
   SD 0.093 0.056 0.095 0.105 0.091
   Median 0.357 0.319 0.338 0.384 0.345
   Q1,Q3 0.272, 0.411 0.263, 0.352 0.244, 0.400 0.334, 0.410 0.272, 0.401
   Min 0.110 0.212 0.114 0.029 0.029
   Max 0.566 0.396 0.468 0.488 0.566
total_contraction_rate_4_6 0.314
   Count 98 24 30 16 168
   mean 0.187 0.209 0.180 0.170 0.188
   SD 0.071 0.059 0.066 0.066 0.068
   Median 0.192 0.211 0.183 0.185 0.189
   Q1,Q3 0.133, 0.225 0.175, 0.258 0.135, 0.213 0.128, 0.215 0.133, 0.227
   Min 0.041 0.106 0.002 0.000 0.000
   Max 0.364 0.318 0.366 0.251 0.366
total_contraction_rate_7_9 0.045
   Count 98 24 30 16 168
   mean 0.136 0.160 0.120 0.098 0.133
   SD 0.082 0.069 0.070 0.060 0.077
   Median 0.115 0.145 0.101 0.096 0.114
   Q1,Q3 0.081, 0.192 0.109, 0.212 0.077, 0.153 0.051, 0.134 0.081, 0.185
   Min 0.005 0.077 0.001 0.000 0.000
   Max 0.455 0.289 0.336 0.219 0.455
total_contraction_rate_over10 0.002
   Count 98 24 30 16 168
   mean 0.085 0.147 0.115 0.085 0.099
   SD 0.071 0.067 0.112 0.070 0.082
   Median 0.067 0.151 0.078 0.060 0.077
   Q1,Q3 0.030, 0.125 0.100, 0.187 0.040, 0.148 0.039, 0.124 0.038, 0.143
   Min 0.000 0.025 0.002 0.000 0.000
   Max 0.283 0.311 0.405 0.266 0.405
number_of_contractions_over26.7_per_hour < 0.001
   Count 98 24 30 16 168
   mean 27.688 53.789 43.732 39.017 35.360
   SD 23.529 38.221 35.782 32.906 30.653
   Median 20.876 48.202 25.455 30.679 25.092
   Q1,Q3 11.757, 37.625 29.563, 61.081 18.510, 63.387 16.338, 49.153 15.182, 46.148
   Min 3.119 4.460 7.251 7.841 3.119
   Max 155.521 168.976 151.467 124.414 168.976
imputed_total_number_of_contractions < 0.001
   Count 98 24 30 16 168
   mean 842.918 1311.250 1286.867 2494.000 1146.345
   SD 398.189 474.939 603.969 1770.949 838.864
   Median 788.000 1202.500 1177.500 1795.500 968.000
   Q1,Q3 577.000, 1006.000 978.750, 1659.250 782.750, 1684.750 1181.500, 3387.500 672.000, 1336.000
   Min 204.000 630.000 332.000 641.000 204.000
   Max 2526.000 2463.000 2731.000 6210.000 6210.000
imputed_sum_of_amplitude < 0.001
   Count 98 24 30 16 168
   mean 108176.988 151095.293 153274.484 311977.985 141770.918
   SD 38328.935 42423.591 59153.165 244119.520 102643.755
   Median 103445.589 149617.504 140137.128 207581.029 120036.438
   Q1,Q3 79719.797, 129270.590 117704.583, 177376.599 110148.167, 193260.523 169271.089, 325802.507 89592.153, 161196.714
   Min 34829.877 83591.146 62203.157 112936.509 34829.877
   Max 225832.269 250021.028 295903.363 950697.962 950697.962
number_of_fast_epochs 0.424
   Count 98 24 30 16 168
   mean 4.724 4.833 4.100 3.125 4.476
   SD 4.497 3.841 4.358 2.941 4.255
   Median 4.000 4.000 3.000 2.000 4.000
   Q1,Q3 1.000, 7.000 2.750, 7.000 1.000, 6.000 1.000, 5.250 1.000, 7.000
   Min 0.000 0.000 0.000 0.000 0.000
   Max 27.000 15.000 17.000 8.000 27.000
number_of_medium_epochs 0.029
   Count 98 24 30 16 168
   mean 3.531 3.292 2.367 1.938 3.137
   SD 2.894 3.155 3.306 1.692 2.955
   Median 3.000 2.000 1.500 1.000 2.000
   Q1,Q3 1.000, 5.000 1.000, 4.500 1.000, 3.000 1.000, 3.000 1.000, 5.000
   Min 0.000 0.000 0.000 0.000 0.000
   Max 13.000 13.000 18.000 5.000 18.000
number_of_slow_epochs 0.058
   Count 98 24 30 16 168
   mean 4.663 3.208 3.533 3.750 4.167
   SD 3.386 3.243 3.521 3.550 3.429
   Median 4.000 3.000 2.500 2.500 3.000
   Q1,Q3 2.000, 6.000 1.000, 5.000 1.000, 4.000 1.000, 5.500 1.000, 6.000
   Min 0.000 0.000 0.000 0.000 0.000
   Max 15.000 12.000 15.000 11.000 15.000
number_of_total_epochs 0.067
   Count 98 24 30 16 168
   mean 12.918 11.333 10.000 8.812 11.780
   SD 8.196 8.560 7.944 6.514 8.129
   Median 12.000 11.500 9.000 7.000 10.000
   Q1,Q3 7.000, 18.000 5.000, 16.000 5.250, 11.750 3.750, 14.000 5.750, 17.000
   Min 1.000 1.000 1.000 1.000 1.000
   Max 51.000 37.000 34.000 22.000 51.000
number_of_fast_epochs_frequency 0.075
   Count 98 24 30 16 168
   mean 1.377 1.544 1.393 0.660 1.335
   SD 1.188 1.038 1.354 0.545 1.168
   Median 1.108 1.310 1.131 0.524 1.107
   Q1,Q3 0.378, 2.274 1.019, 1.919 0.261, 1.976 0.271, 0.990 0.381, 1.954
   Min 0.000 0.000 0.000 0.000 0.000
   Max 4.601 4.450 5.168 1.931 5.168
number_of_medium_epochs_frequency 0.003
   Count 98 24 30 16 168
   mean 1.019 0.969 0.713 0.372 0.895
   SD 0.837 0.718 0.676 0.286 0.779
   Median 0.847 0.886 0.604 0.369 0.735
   Q1,Q3 0.339, 1.529 0.594, 1.195 0.318, 1.000 0.193, 0.524 0.313, 1.274
   Min 0.000 0.000 0.000 0.000 0.000
   Max 4.302 2.757 3.240 0.964 4.302
number_of_slow_epochs_frequency 0.009
   Count 98 24 30 16 168
   mean 1.295 0.890 1.047 0.666 1.133
   SD 0.850 0.790 0.751 0.511 0.820
   Median 1.214 0.779 0.924 0.611 1.005
   Q1,Q3 0.689, 1.832 0.358, 1.332 0.426, 1.661 0.334, 0.915 0.506, 1.650
   Min 0.000 0.000 0.000 0.000 0.000
   Max 3.648 3.232 2.700 1.572 3.648
number_of_total_epochs_frequency < 0.001
   Count 98 24 30 16 168
   mean 3.691 3.402 3.153 1.697 3.364
   SD 2.027 1.946 1.848 0.828 1.975
   Median 3.430 3.144 3.081 1.513 3.096
   Q1,Q3 2.159, 4.985 2.119, 4.317 1.765, 3.962 1.136, 2.300 1.778, 4.430
   Min 0.302 0.552 0.384 0.326 0.302
   Max 9.386 7.846 7.753 3.143 9.386
motility_index 0.015
   Count 98 24 30 16 168
   mean 18.001 18.355 18.079 18.782 18.140
   SD 0.873 0.758 1.022 0.872 0.911
   Median 18.086 18.500 17.924 19.016 18.151
   Q1,Q3 17.521, 18.615 17.857, 18.820 17.565, 18.899 18.020, 19.289 17.584, 18.753
   Min 15.750 16.816 15.981 17.360 15.750
   Max 20.154 19.678 19.826 20.278 20.278
mycontrols <- tableby.control(test = T, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "mean", "sd", "median", "q1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels=list(N='Count', Nmiss='Missing Values', mean='mean', sd='SD', median='Median', q1q3='Q1,Q3', min='Min', max='Max'))


tab.test <- tableby(groups ~ `median_RMSSD_Time_fast epoch` + `median_RMSSD_Time_medium epoch` + `median_RMSSD_Time_slow epoch`+ median_RMSSD_Time_total_epoch + `median_RMSSD_Magnitude_fast epoch` + `median_RMSSD_Magnitude_medium epoch` + `median_RMSSD_Magnitude_slow epoch` + median_RMSSD_Magnitude_total_epoch + `n_epochs_fast epoch` + `n_epochs_medium epoch` + `n_epochs_slow epoch`, data=all_data_RMSSD, control = mycontrols)

summary(tab.test)
Jack’s Data (N=98) normal_SBTT_abnormal_CASS (N=24) normal_SBTT_normal_CASS (N=30) prolonged_SBTT (N=16) Total (N=168) p value
median_RMSSD_Time_fast epoch 0.110
   Count 83 22 24 14 143
   Missing Values 15 2 6 2 25
   mean 50.465 52.652 52.751 54.094 51.541
   SD 4.285 6.043 6.671 7.158 5.446
   Median 49.280 51.373 52.530 53.617 50.190
   Q1,Q3 47.425, 53.165 48.320, 56.224 47.042, 57.315 48.523, 57.059 47.672, 55.133
   Min 43.860 45.105 41.540 43.885 41.540
   Max 63.850 71.460 68.990 71.790 71.790
median_RMSSD_Time_medium epoch 0.018
   Count 87 20 24 13 144
   Missing Values 11 4 6 3 24
   mean 63.418 69.505 65.233 67.195 64.907
   SD 7.848 11.156 7.523 11.727 8.889
   Median 61.950 68.787 64.795 64.430 63.220
   Q1,Q3 58.100, 65.892 64.308, 71.417 60.700, 69.285 60.540, 68.960 59.020, 68.398
   Min 52.110 54.780 53.570 50.480 50.480
   Max 96.330 101.345 80.560 95.890 101.345
median_RMSSD_Time_slow epoch 0.224
   Count 90 19 26 13 148
   Missing Values 8 5 4 3 20
   mean 78.458 84.218 84.863 79.166 80.385
   SD 9.098 15.395 14.989 7.530 11.394
   Median 77.975 80.190 82.920 77.400 79.388
   Q1,Q3 71.345, 84.540 76.070, 88.485 73.539, 95.785 73.420, 84.980 72.803, 86.286
   Min 52.200 64.420 64.010 68.985 52.200
   Max 101.360 138.230 117.070 91.350 138.230
median_RMSSD_Time_total_epoch 0.764
   Count 98 24 30 16 168
   mean 64.060 65.017 62.848 65.641 64.131
   SD 10.438 17.527 9.990 10.044 11.516
   Median 62.395 63.505 62.873 66.955 63.110
   Q1,Q3 57.683, 70.236 55.435, 68.371 58.581, 68.760 60.663, 69.500 57.415, 69.545
   Min 46.620 46.790 46.380 47.840 46.380
   Max 98.730 138.230 83.040 88.090 138.230
median_RMSSD_Magnitude_fast epoch 0.368
   Count 83 22 24 14 143
   Missing Values 15 2 6 2 25
   mean 65.556 73.100 72.270 73.601 68.631
   SD 24.956 29.681 28.263 28.109 26.551
   Median 61.095 72.650 68.257 75.285 66.490
   Q1,Q3 47.792, 75.675 53.935, 84.433 52.824, 87.936 62.897, 79.865 49.400, 82.235
   Min 20.160 24.450 30.250 26.515 20.160
   Max 157.550 158.900 130.970 131.850 158.900
median_RMSSD_Magnitude_medium epoch 0.071
   Count 87 20 24 13 144
   Missing Values 11 4 6 3 24
   mean 56.492 70.329 55.683 52.408 57.910
   SD 23.552 29.318 20.858 21.607 24.152
   Median 49.800 63.540 47.740 47.980 51.140
   Q1,Q3 40.300, 63.960 50.250, 81.223 40.731, 73.131 36.505, 55.990 40.835, 67.465
   Min 28.345 29.715 27.860 30.220 27.860
   Max 149.150 151.905 98.165 112.990 151.905
median_RMSSD_Magnitude_slow epoch 0.117
   Count 90 19 26 13 148
   Missing Values 8 5 4 3 20
   mean 43.154 45.317 42.074 58.182 44.562
   SD 26.142 16.941 18.883 39.329 25.585
   Median 35.885 44.630 42.317 45.160 40.722
   Q1,Q3 26.372, 49.825 34.177, 48.555 29.869, 48.213 38.030, 50.540 28.337, 49.554
   Min 20.100 22.930 17.460 28.085 17.460
   Max 180.510 87.080 111.320 177.435 180.510
median_RMSSD_Magnitude_total_epoch 0.054
   Count 98 24 30 16 168
   mean 52.018 63.340 59.000 55.252 55.190
   SD 21.697 24.650 26.846 17.715 22.990
   Median 47.945 57.235 52.045 55.907 51.043
   Q1,Q3 37.714, 59.303 49.212, 75.714 42.013, 66.657 39.045, 67.170 39.865, 62.062
   Min 21.700 24.450 22.125 31.570 21.700
   Max 136.030 139.395 130.970 93.415 139.395
n_epochs_fast epoch 0.306
   Count 83 22 24 14 143
   Missing Values 15 2 6 2 25
   mean 5.578 5.273 5.125 3.571 5.259
   SD 4.370 3.706 4.297 2.875 4.141
   Median 5.000 4.500 4.500 2.000 5.000
   Q1,Q3 2.500, 7.000 3.000, 7.000 1.750, 6.250 1.000, 5.750 2.000, 7.000
   Min 1.000 1.000 1.000 1.000 1.000
   Max 27.000 15.000 17.000 8.000 27.000
n_epochs_medium epoch 0.042
   Count 87 20 24 13 144
   Missing Values 11 4 6 3 24
   mean 3.977 3.950 2.958 2.385 3.660
   SD 2.766 3.052 3.458 1.557 2.875
   Median 3.000 3.000 2.000 2.000 3.000
   Q1,Q3 2.000, 6.000 2.000, 6.000 1.000, 3.000 1.000, 3.000 1.750, 5.000
   Min 1.000 1.000 1.000 1.000 1.000
   Max 13.000 13.000 18.000 5.000 18.000
n_epochs_slow epoch 0.196
   Count 90 19 26 13 148
   Missing Values 8 5 4 3 20
   mean 5.078 4.053 4.077 4.615 4.730
   SD 3.219 3.135 3.475 3.380 3.267
   Median 4.000 3.000 3.000 4.000 4.000
   Q1,Q3 3.000, 7.000 1.500, 5.000 1.250, 5.500 2.000, 7.000 2.000, 6.250
   Min 1.000 1.000 1.000 1.000 1.000
   Max 15.000 12.000 15.000 11.000 15.000

4.2.3 Post-hoc Pairwise Comparisons across 4 Groups

mycontrols <- tableby.control(test = T, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "mean", "sd", "median", "q1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels=list(N='Count', Nmiss='Missing Values', mean='mean', sd='SD', median='Median', q1q3='Q1,Q3', min='Min', max='Max'))


tab.test <- tableby(groups ~ total_contractions_frequency + total_contraction_rate_over10 + number_of_contractions_over26.7_per_hour + number_of_total_epochs_frequency + imputed_sum_of_amplitude + motility_index, data=all_data, control = mycontrols)

summary(tab.test)
Jack’s Data (N=98) normal_SBTT_abnormal_CASS (N=24) normal_SBTT_normal_CASS (N=30) prolonged_SBTT (N=16) Total (N=168) p value
total_contractions_frequency < 0.001
   Count 98 24 30 16 168
   mean 226.842 328.487 314.751 288.198 262.904
   SD 91.448 76.671 193.568 153.865 127.379
   Median 218.881 334.502 272.254 231.708 254.795
   Q1,Q3 160.092, 272.262 280.663, 373.353 211.168, 326.458 183.643, 355.167 174.421, 328.764
   Min 52.572 152.216 118.919 122.110 52.572
   Max 433.827 484.767 1122.063 581.841 1122.063
total_contraction_rate_over10 0.002
   Count 98 24 30 16 168
   mean 0.085 0.147 0.115 0.085 0.099
   SD 0.071 0.067 0.112 0.070 0.082
   Median 0.067 0.151 0.078 0.060 0.077
   Q1,Q3 0.030, 0.125 0.100, 0.187 0.040, 0.148 0.039, 0.124 0.038, 0.143
   Min 0.000 0.025 0.002 0.000 0.000
   Max 0.283 0.311 0.405 0.266 0.405
number_of_contractions_over26.7_per_hour < 0.001
   Count 98 24 30 16 168
   mean 27.688 53.789 43.732 39.017 35.360
   SD 23.529 38.221 35.782 32.906 30.653
   Median 20.876 48.202 25.455 30.679 25.092
   Q1,Q3 11.757, 37.625 29.563, 61.081 18.510, 63.387 16.338, 49.153 15.182, 46.148
   Min 3.119 4.460 7.251 7.841 3.119
   Max 155.521 168.976 151.467 124.414 168.976
number_of_total_epochs_frequency < 0.001
   Count 98 24 30 16 168
   mean 3.691 3.402 3.153 1.697 3.364
   SD 2.027 1.946 1.848 0.828 1.975
   Median 3.430 3.144 3.081 1.513 3.096
   Q1,Q3 2.159, 4.985 2.119, 4.317 1.765, 3.962 1.136, 2.300 1.778, 4.430
   Min 0.302 0.552 0.384 0.326 0.302
   Max 9.386 7.846 7.753 3.143 9.386
imputed_sum_of_amplitude < 0.001
   Count 98 24 30 16 168
   mean 108176.988 151095.293 153274.484 311977.985 141770.918
   SD 38328.935 42423.591 59153.165 244119.520 102643.755
   Median 103445.589 149617.504 140137.128 207581.029 120036.438
   Q1,Q3 79719.797, 129270.590 117704.583, 177376.599 110148.167, 193260.523 169271.089, 325802.507 89592.153, 161196.714
   Min 34829.877 83591.146 62203.157 112936.509 34829.877
   Max 225832.269 250021.028 295903.363 950697.962 950697.962
motility_index 0.015
   Count 98 24 30 16 168
   mean 18.001 18.355 18.079 18.782 18.140
   SD 0.873 0.758 1.022 0.872 0.911
   Median 18.086 18.500 17.924 19.016 18.151
   Q1,Q3 17.521, 18.615 17.857, 18.820 17.565, 18.899 18.020, 19.289 17.584, 18.753
   Min 15.750 16.816 15.981 17.360 15.750
   Max 20.154 19.678 19.826 20.278 20.278
mycontrols <- tableby.control(test = T, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "mean", "sd", "median", "q1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels=list(N='Count', Nmiss='Missing Values', mean='mean', sd='SD', median='Median', q1q3='Q1,Q3', min='Min', max='Max'))


tab.test <- tableby(groups ~ `median_RMSSD_Time_fast epoch` + `median_RMSSD_Time_medium epoch` + `median_RMSSD_Time_slow epoch`+ median_RMSSD_Time_total_epoch + `median_RMSSD_Magnitude_fast epoch` + `median_RMSSD_Magnitude_medium epoch` + `median_RMSSD_Magnitude_slow epoch` + median_RMSSD_Magnitude_total_epoch + `n_epochs_fast epoch` + `n_epochs_medium epoch` + `n_epochs_slow epoch`, data=all_data_RMSSD, control = mycontrols)

summary(tab.test)
Jack’s Data (N=98) normal_SBTT_abnormal_CASS (N=24) normal_SBTT_normal_CASS (N=30) prolonged_SBTT (N=16) Total (N=168) p value
median_RMSSD_Time_fast epoch 0.110
   Count 83 22 24 14 143
   Missing Values 15 2 6 2 25
   mean 50.465 52.652 52.751 54.094 51.541
   SD 4.285 6.043 6.671 7.158 5.446
   Median 49.280 51.373 52.530 53.617 50.190
   Q1,Q3 47.425, 53.165 48.320, 56.224 47.042, 57.315 48.523, 57.059 47.672, 55.133
   Min 43.860 45.105 41.540 43.885 41.540
   Max 63.850 71.460 68.990 71.790 71.790
median_RMSSD_Time_medium epoch 0.018
   Count 87 20 24 13 144
   Missing Values 11 4 6 3 24
   mean 63.418 69.505 65.233 67.195 64.907
   SD 7.848 11.156 7.523 11.727 8.889
   Median 61.950 68.787 64.795 64.430 63.220
   Q1,Q3 58.100, 65.892 64.308, 71.417 60.700, 69.285 60.540, 68.960 59.020, 68.398
   Min 52.110 54.780 53.570 50.480 50.480
   Max 96.330 101.345 80.560 95.890 101.345
median_RMSSD_Time_slow epoch 0.224
   Count 90 19 26 13 148
   Missing Values 8 5 4 3 20
   mean 78.458 84.218 84.863 79.166 80.385
   SD 9.098 15.395 14.989 7.530 11.394
   Median 77.975 80.190 82.920 77.400 79.388
   Q1,Q3 71.345, 84.540 76.070, 88.485 73.539, 95.785 73.420, 84.980 72.803, 86.286
   Min 52.200 64.420 64.010 68.985 52.200
   Max 101.360 138.230 117.070 91.350 138.230
median_RMSSD_Time_total_epoch 0.764
   Count 98 24 30 16 168
   mean 64.060 65.017 62.848 65.641 64.131
   SD 10.438 17.527 9.990 10.044 11.516
   Median 62.395 63.505 62.873 66.955 63.110
   Q1,Q3 57.683, 70.236 55.435, 68.371 58.581, 68.760 60.663, 69.500 57.415, 69.545
   Min 46.620 46.790 46.380 47.840 46.380
   Max 98.730 138.230 83.040 88.090 138.230
median_RMSSD_Magnitude_fast epoch 0.368
   Count 83 22 24 14 143
   Missing Values 15 2 6 2 25
   mean 65.556 73.100 72.270 73.601 68.631
   SD 24.956 29.681 28.263 28.109 26.551
   Median 61.095 72.650 68.257 75.285 66.490
   Q1,Q3 47.792, 75.675 53.935, 84.433 52.824, 87.936 62.897, 79.865 49.400, 82.235
   Min 20.160 24.450 30.250 26.515 20.160
   Max 157.550 158.900 130.970 131.850 158.900
median_RMSSD_Magnitude_medium epoch 0.071
   Count 87 20 24 13 144
   Missing Values 11 4 6 3 24
   mean 56.492 70.329 55.683 52.408 57.910
   SD 23.552 29.318 20.858 21.607 24.152
   Median 49.800 63.540 47.740 47.980 51.140
   Q1,Q3 40.300, 63.960 50.250, 81.223 40.731, 73.131 36.505, 55.990 40.835, 67.465
   Min 28.345 29.715 27.860 30.220 27.860
   Max 149.150 151.905 98.165 112.990 151.905
median_RMSSD_Magnitude_slow epoch 0.117
   Count 90 19 26 13 148
   Missing Values 8 5 4 3 20
   mean 43.154 45.317 42.074 58.182 44.562
   SD 26.142 16.941 18.883 39.329 25.585
   Median 35.885 44.630 42.317 45.160 40.722
   Q1,Q3 26.372, 49.825 34.177, 48.555 29.869, 48.213 38.030, 50.540 28.337, 49.554
   Min 20.100 22.930 17.460 28.085 17.460
   Max 180.510 87.080 111.320 177.435 180.510
median_RMSSD_Magnitude_total_epoch 0.054
   Count 98 24 30 16 168
   mean 52.018 63.340 59.000 55.252 55.190
   SD 21.697 24.650 26.846 17.715 22.990
   Median 47.945 57.235 52.045 55.907 51.043
   Q1,Q3 37.714, 59.303 49.212, 75.714 42.013, 66.657 39.045, 67.170 39.865, 62.062
   Min 21.700 24.450 22.125 31.570 21.700
   Max 136.030 139.395 130.970 93.415 139.395
n_epochs_fast epoch 0.306
   Count 83 22 24 14 143
   Missing Values 15 2 6 2 25
   mean 5.578 5.273 5.125 3.571 5.259
   SD 4.370 3.706 4.297 2.875 4.141
   Median 5.000 4.500 4.500 2.000 5.000
   Q1,Q3 2.500, 7.000 3.000, 7.000 1.750, 6.250 1.000, 5.750 2.000, 7.000
   Min 1.000 1.000 1.000 1.000 1.000
   Max 27.000 15.000 17.000 8.000 27.000
n_epochs_medium epoch 0.042
   Count 87 20 24 13 144
   Missing Values 11 4 6 3 24
   mean 3.977 3.950 2.958 2.385 3.660
   SD 2.766 3.052 3.458 1.557 2.875
   Median 3.000 3.000 2.000 2.000 3.000
   Q1,Q3 2.000, 6.000 2.000, 6.000 1.000, 3.000 1.000, 3.000 1.750, 5.000
   Min 1.000 1.000 1.000 1.000 1.000
   Max 13.000 13.000 18.000 5.000 18.000
n_epochs_slow epoch 0.196
   Count 90 19 26 13 148
   Missing Values 8 5 4 3 20
   mean 5.078 4.053 4.077 4.615 4.730
   SD 3.219 3.135 3.475 3.380 3.267
   Median 4.000 3.000 3.000 4.000 4.000
   Q1,Q3 3.000, 7.000 1.500, 5.000 1.250, 5.500 2.000, 7.000 2.000, 6.250
   Min 1.000 1.000 1.000 1.000 1.000
   Max 15.000 12.000 15.000 11.000 15.000
library(FSA)

combined_results <- data.frame()

for (var in c("total_contractions_frequency", "total_contraction_rate_over10", "number_of_contractions_over26.7_per_hour", "number_of_total_epochs_frequency", "imputed_sum_of_amplitude", "motility_index")) {
  # Perform Dunn's test
  dunn <- dunnTest(as.formula(paste(var, "~ groups")), data = all_data, method = "bonferroni")
  
  # Extract and format results
  dunn_df <- dunn$res %>%
    mutate(Variable = var) %>%
    select(Variable, Comparison, P.adj) %>%
    rename(P.adjusted = P.adj)
  
  # Combine into one table
  combined_results <- bind_rows(combined_results, dunn_df)
}

# View the combined table
combined_results <- combined_results %>%
  mutate(P.adjusted = format(round(P.adjusted, 3), nsmall = 3)) %>%
  select(Variable, Comparison, P.adjusted) %>%
  pivot_wider(names_from = Comparison, values_from = P.adjusted) 
combined_results <- data.frame()

for (var in c("total_contractions_frequency", "total_contraction_rate_over10", "number_of_contractions_over26.7_per_hour", "number_of_total_epochs_frequency", "imputed_sum_of_amplitude", "motility_index")) {
  # Perform Dunn's test
  dunn <- dunnTest(as.formula(paste(var, "~ groups")), data = all_data, method = "bonferroni")
  
  # Extract and format results
  dunn_df <- dunn$res %>%
    mutate(response = var) %>%
    select(response, Comparison, P.adj) %>%
    rename(P.adjusted = P.adj)
  
  # Combine into one table
  combined_results <- bind_rows(combined_results, dunn_df)
}

# View the combined table
# combined_results
knitr::kable(combined_results, format = "html")
response Comparison P.adjusted
total_contractions_frequency Jack’s Data - normal_SBTT_abnormal_CASS 0.0000310
total_contractions_frequency Jack’s Data - normal_SBTT_normal_CASS 0.0597706
total_contractions_frequency normal_SBTT_abnormal_CASS - normal_SBTT_normal_CASS 0.4064831
total_contractions_frequency Jack’s Data - prolonged_SBTT 1.0000000
total_contractions_frequency normal_SBTT_abnormal_CASS - prolonged_SBTT 0.1932298
total_contractions_frequency normal_SBTT_normal_CASS - prolonged_SBTT 1.0000000
total_contraction_rate_over10 Jack’s Data - normal_SBTT_abnormal_CASS 0.0006668
total_contraction_rate_over10 Jack’s Data - normal_SBTT_normal_CASS 1.0000000
total_contraction_rate_over10 normal_SBTT_abnormal_CASS - normal_SBTT_normal_CASS 0.0923847
total_contraction_rate_over10 Jack’s Data - prolonged_SBTT 1.0000000
total_contraction_rate_over10 normal_SBTT_abnormal_CASS - prolonged_SBTT 0.0476134
total_contraction_rate_over10 normal_SBTT_normal_CASS - prolonged_SBTT 1.0000000
number_of_contractions_over26.7_per_hour Jack’s Data - normal_SBTT_abnormal_CASS 0.0008870
number_of_contractions_over26.7_per_hour Jack’s Data - normal_SBTT_normal_CASS 0.0842242
number_of_contractions_over26.7_per_hour normal_SBTT_abnormal_CASS - normal_SBTT_normal_CASS 1.0000000
number_of_contractions_over26.7_per_hour Jack’s Data - prolonged_SBTT 1.0000000
number_of_contractions_over26.7_per_hour normal_SBTT_abnormal_CASS - prolonged_SBTT 0.7461671
number_of_contractions_over26.7_per_hour normal_SBTT_normal_CASS - prolonged_SBTT 1.0000000
number_of_total_epochs_frequency Jack’s Data - normal_SBTT_abnormal_CASS 1.0000000
number_of_total_epochs_frequency Jack’s Data - normal_SBTT_normal_CASS 1.0000000
number_of_total_epochs_frequency normal_SBTT_abnormal_CASS - normal_SBTT_normal_CASS 1.0000000
number_of_total_epochs_frequency Jack’s Data - prolonged_SBTT 0.0003179
number_of_total_epochs_frequency normal_SBTT_abnormal_CASS - prolonged_SBTT 0.0189014
number_of_total_epochs_frequency normal_SBTT_normal_CASS - prolonged_SBTT 0.0475040
imputed_sum_of_amplitude Jack’s Data - normal_SBTT_abnormal_CASS 0.0004672
imputed_sum_of_amplitude Jack’s Data - normal_SBTT_normal_CASS 0.0007496
imputed_sum_of_amplitude normal_SBTT_abnormal_CASS - normal_SBTT_normal_CASS 1.0000000
imputed_sum_of_amplitude Jack’s Data - prolonged_SBTT 0.0000000
imputed_sum_of_amplitude normal_SBTT_abnormal_CASS - prolonged_SBTT 0.1850503
imputed_sum_of_amplitude normal_SBTT_normal_CASS - prolonged_SBTT 0.0606662
motility_index Jack’s Data - normal_SBTT_abnormal_CASS 0.5064354
motility_index Jack’s Data - normal_SBTT_normal_CASS 1.0000000
motility_index normal_SBTT_abnormal_CASS - normal_SBTT_normal_CASS 1.0000000
motility_index Jack’s Data - prolonged_SBTT 0.0174224
motility_index normal_SBTT_abnormal_CASS - prolonged_SBTT 1.0000000
motility_index normal_SBTT_normal_CASS - prolonged_SBTT 0.1456734

4.2.4 Bar Plot

library(ggsignif)
vars <- c("total_contractions_frequency", "total_contraction_rate_over10", "number_of_contractions_over26.7_per_hour", "number_of_total_epochs_frequency", "imputed_sum_of_amplitude", "motility_index")

medians <- all_data %>%
  filter(!is.na(groups)) %>%
  group_by(groups) %>%
  summarise(across(all_of(vars), ~ median(.x, na.rm = TRUE)), .groups = "drop") %>%
  tidyr::pivot_longer(-groups, names_to = "response", values_to = "median")

combined_results <- combined_results %>% 
  # filter(P.adjusted < 0.05) %>%
  separate(Comparison, into = c("group1","group2"), sep = "\\s*-\\s*")

dunn_sig <- combined_results %>%
  filter(P.adjusted < 0.05) %>%
  mutate(
    label = ifelse(P.adjusted < 0.001, "p<0.001", sprintf("p=%.3f", P.adjusted)),
    label_sig = case_when(
      P.adjusted < 0.0001 ~ "****",
      P.adjusted < 0.001  ~ "***",
      P.adjusted < 0.01   ~ "**",
      P.adjusted < 0.05   ~ "*",
      TRUE ~ "ns"
  )) %>%
  select(response, group1, group2, label, label_sig)

y_stats <- medians %>%
  group_by(response) %>%
  summarise(
    y_max = max(median, na.rm = TRUE),
    y_min = min(median, na.rm = TRUE),
    y_rng = y_max - y_min,
    .groups = "drop"
  ) %>%
  mutate(
    pad  = ifelse(y_rng == 0, 0.10 * pmax(abs(y_max), 1), 0.08 * y_rng),
    step = ifelse(y_rng == 0, 0.08 * pmax(abs(y_max), 1), 0.10 * y_rng)
  )

anno <- dunn_sig %>%
  left_join(y_stats, by = "response") %>%
  group_by(response) %>%
  arrange(group1, group2, .by_group = TRUE) %>%
  mutate(y_position = y_max + pad + (row_number() - 1) * step) %>%
  ungroup()

anno_plot <- anno %>%
  transmute(
    response,
    xmin = group1,
    xmax = group2,
    annotations = label,
    annotations_sig = label_sig,
    y_position = y_position
  )

# Align factor levels so bars and brackets share the same x categories
grp_levels <- sort(unique(medians$groups))
medians$groups <- factor(medians$groups, levels = grp_levels)
anno_plot$xmin <- factor(anno_plot$xmin, levels = grp_levels)
anno_plot$xmax <- factor(anno_plot$xmax, levels = grp_levels)

ggplot(medians %>% 
          mutate(
            groups = case_when(
              groups == "Jack's Data" ~ "Healthy Controls",
              groups == "normal_SBTT_normal_CASS" ~ "Normal PWH",
              groups == "normal_SBTT_abnormal_CASS" ~ "PWH with AN",
              groups == "prolonged_SBTT" ~ "PWH with Prolonged SBTT"),
            response = case_when(
              response == 'total_contractions_frequency' ~ 'Contractions per hour',
              response == 'total_contraction_rate_over10' ~ 'Fraction of time spent rapidly contracting',
              response == 'number_of_contractions_over26.7_per_hour' ~ 'High amplitude contractions per hour',
              response == 'number_of_total_epochs_frequency' ~ 'Rhythmic intervals per hour',
              response == 'imputed_sum_of_amplitude' ~ 'Sum of the amplitudes',
              response == 'motility_index' ~ 'Motility index',
            )
          ), 
       aes(x = groups, y = median, fill = groups)) +
  geom_col(width = 0.7) +
  geom_signif(
    data = anno_plot %>% 
          mutate(
            xmin = case_when(
              xmin == "Jack's Data" ~ "Healthy Controls",
              xmin == "normal_SBTT_normal_CASS" ~ "Normal PWH",
              xmin == "normal_SBTT_abnormal_CASS" ~ "PWH with AN",
              xmin == "prolonged_SBTT" ~ "PWH with Prolonged SBTT"),
            xmax = case_when(
              xmax == "Jack's Data" ~ "Healthy Controls",
              xmax == "normal_SBTT_normal_CASS" ~ "Normal PWH",
              xmax == "normal_SBTT_abnormal_CASS" ~ "PWH with AN",
              xmax == "prolonged_SBTT" ~ "PWH with Prolonged SBTT"),            
            response = case_when(
              response == 'total_contractions_frequency' ~ 'Contractions per hour',
              response == 'total_contraction_rate_over10' ~ 'Fraction of time spent rapidly contracting',
              response == 'number_of_contractions_over26.7_per_hour' ~ 'High amplitude contractions per hour',
              response == 'number_of_total_epochs_frequency' ~ 'Rhythmic intervals per hour',
              response == 'imputed_sum_of_amplitude' ~ 'Sum of the amplitudes',
              response == 'motility_index' ~ 'Motility index',
            )
          ),
    manual = TRUE,            # REQUIRED for supplying data columns
    inherit.aes = FALSE,      # don't inherit fill=group, etc.
    aes(xmin = xmin, xmax = xmax,
        annotations = annotations,
        y_position = y_position),
    tip_length = 0.01,
    textsize = 3
  ) +
  facet_wrap(~ response, scales = "free_y") +
  scale_fill_brewer(palette = "Set2", name = "Group") +
  labs(x = NULL, y = "Median", title = "Medians with Dunn (Bonferroni)") +
  theme_minimal(base_size = 12) +
  theme(
    strip.text = element_text(size = 13, face = "bold"),
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 20, hjust = 1, face = "bold"),
    axis.text.y = element_text(face = "bold")
  )

ggplot(medians %>% 
          mutate(
            groups = case_when(
              groups == "Jack's Data" ~ "Healthy Controls",
              groups == "normal_SBTT_normal_CASS" ~ "Normal PWH",
              groups == "normal_SBTT_abnormal_CASS" ~ "PWH with AN",
              groups == "prolonged_SBTT" ~ "PWH with Prolonged SBTT"),
            response = case_when(
              response == 'total_contractions_frequency' ~ 'Contractions per hour',
              response == 'total_contraction_rate_over10' ~ 'Fraction of time spent rapidly contracting',
              response == 'number_of_contractions_over26.7_per_hour' ~ 'High amplitude contractions per hour',
              response == 'number_of_total_epochs_frequency' ~ 'Rhythmic intervals per hour',
              response == 'imputed_sum_of_amplitude' ~ 'Sum of the amplitudes',
              response == 'motility_index' ~ 'Motility index',
            )
          ), 
       aes(x = groups, y = median, fill = groups)) +
  geom_col(width = 0.7) +
  geom_signif(
    data = anno_plot %>% 
          mutate(
            xmin = case_when(
              xmin == "Jack's Data" ~ "Healthy Controls",
              xmin == "normal_SBTT_normal_CASS" ~ "Normal PWH",
              xmin == "normal_SBTT_abnormal_CASS" ~ "PWH with AN",
              xmin == "prolonged_SBTT" ~ "PWH with Prolonged SBTT"),
            xmax = case_when(
              xmax == "Jack's Data" ~ "Healthy Controls",
              xmax == "normal_SBTT_normal_CASS" ~ "Normal PWH",
              xmax == "normal_SBTT_abnormal_CASS" ~ "PWH with AN",
              xmax == "prolonged_SBTT" ~ "PWH with Prolonged SBTT"),            
            response = case_when(
              response == 'total_contractions_frequency' ~ 'Contractions per hour',
              response == 'total_contraction_rate_over10' ~ 'Fraction of time spent rapidly contracting',
              response == 'number_of_contractions_over26.7_per_hour' ~ 'High amplitude contractions per hour',
              response == 'number_of_total_epochs_frequency' ~ 'Rhythmic intervals per hour',
              response == 'imputed_sum_of_amplitude' ~ 'Sum of the amplitudes',
              response == 'motility_index' ~ 'Motility index',
            )
          ),
    manual = TRUE,            # REQUIRED for supplying data columns
    inherit.aes = FALSE,      # don't inherit fill=group, etc.
    aes(xmin = xmin, xmax = xmax,
        annotations = annotations_sig,
        y_position = y_position),
    tip_length = 0.01,
    textsize = 3
  ) +
  facet_wrap(~ response, scales = "free_y") +
  scale_fill_brewer(palette = "Set2", name = "Group") +
  labs(x = NULL, y = "Median", title = "Medians with Dunn (Bonferroni)") +
  theme_minimal(base_size = 12) +
  theme(
    strip.text = element_text(size = 13, face = "bold"),
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 20, hjust = 1, face = "bold"),
    axis.text.y = element_text(face = "bold")
  )

4.3 Table 3 - Removed

EVA_Controls_5min <- EVA_Controls_5min %>%
  left_join(Control_data_eva, by = c("combined_ID" = "SUBJID"))
EVA_Patients_5min <- EVA_Patients_5min %>%
  left_join(PWH_data, by = c("combined_ID" = "ID"))
Control_Patients_5min <- Control_Patients_5min %>%
  left_join(Control_data_jack, by = c("combined_ID" = "combined_ID"))
EVA_Controls_5min$source <- "EVA Controls"

all_data <- rbind(Control_Patients_5min, EVA_Patients_5min) #, EVA_Controls_5min)
all_data <- all_data %>% group_by(combined_ID) %>% slice(1)
mycontrols <- tableby.control(test = T, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "mean", "sd", "median", "q1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels=list(N='Count', Nmiss='Missing Values', mean='mean', sd='SD', median='Median', q1q3='Q1,Q3', min='Min', max='Max'))


tab.test <- tableby(source ~ total_time + total_no_missing_time + no_missing_time_ratio + total_number_of_contractions + sum_of_amplitude + total_number_of_contractions_over26.7 + total_contraction_rate_0 + total_contraction_rate_1_3 + total_contraction_rate_4_6 + total_contraction_rate_7_9 + total_contraction_rate_over10 + number_of_contractions_over26.7_per_hour + imputed_total_number_of_contractions + imputed_sum_of_amplitude + number_of_fast_epochs + number_of_medium_epochs + number_of_slow_epochs + number_of_total_epochs + number_of_fast_epochs_frequency + number_of_medium_epochs_frequency + number_of_slow_epochs_frequency + number_of_total_epochs_frequency + motility_index, data=all_data, control = mycontrols)

summary(tab.test)
EVA Patient (N=71) Jack’s Data (N=98) Total (N=169) p value
total_time < 0.001
   Count 71 98 169
   mean 5.191 3.762 4.363
   SD 3.099 1.160 2.297
   Median 4.703 3.706 3.951
   Q1,Q3 3.716, 5.750 2.910, 4.409 3.245, 4.973
   Min 1.664 1.312 1.312
   Max 23.128 6.590 23.128
total_no_missing_time 0.226
   Count 71 98 169
   mean 3.393 3.518 3.466
   SD 1.579 1.044 1.293
   Median 3.063 3.428 3.338
   Q1,Q3 2.425, 4.372 2.870, 4.104 2.603, 4.170
   Min 1.010 1.307 1.010
   Max 8.811 6.212 8.811
no_missing_time_ratio < 0.001
   Count 71 98 169
   mean 0.716 0.942 0.847
   SD 0.241 0.084 0.202
   Median 0.766 0.971 0.941
   Q1,Q3 0.613, 0.897 0.939, 0.990 0.788, 0.981
   Min 0.103 0.442 0.103
   Max 0.997 1.000 1.000
total_number_of_contractions 0.012
   Count 71 98 169
   mean 973.676 786.857 865.343
   SD 477.890 392.159 438.755
   Median 885.000 734.000 780.000
   Q1,Q3 615.000, 1239.500 517.500, 951.750 554.000, 1073.000
   Min 252.000 174.000 174.000
   Max 2582.000 2517.000 2582.000
sum_of_amplitude 0.051
   Count 71 98 169
   mean 115979.438 100940.428 107258.592
   SD 49744.617 37378.788 43510.713
   Median 106323.294 96916.642 101003.974
   Q1,Q3 82820.367, 148375.073 76487.509, 125008.950 77963.451, 132296.825
   Min 34593.705 31958.930 31958.930
   Max 262383.401 224859.417 262383.401
total_number_of_contractions_over26.7 < 0.001
   Count 71 98 169
   mean 136.352 90.847 109.964
   SD 94.432 68.169 83.103
   Median 115.000 72.000 89.000
   Q1,Q3 59.000, 187.500 41.000, 127.500 47.000, 151.000
   Min 8.000 12.000 8.000
   Max 441.000 338.000 441.000
total_contraction_rate_0 0.493
   Count 71 98 169
   mean 0.237 0.247 0.243
   SD 0.168 0.153 0.159
   Median 0.216 0.222 0.218
   Q1,Q3 0.129, 0.293 0.122, 0.341 0.129, 0.326
   Min 0.012 0.000 0.000
   Max 0.970 0.685 0.970
total_contraction_rate_1_3 0.274
   Count 71 98 169
   mean 0.327 0.344 0.337
   SD 0.087 0.093 0.090
   Median 0.336 0.357 0.343
   Q1,Q3 0.268, 0.389 0.272, 0.411 0.272, 0.401
   Min 0.029 0.110 0.029
   Max 0.488 0.566 0.566
total_contraction_rate_4_6 0.866
   Count 71 98 169
   mean 0.188 0.187 0.188
   SD 0.065 0.071 0.068
   Median 0.189 0.192 0.189
   Q1,Q3 0.137, 0.227 0.133, 0.225 0.133, 0.226
   Min 0.000 0.041 0.000
   Max 0.366 0.364 0.366
total_contraction_rate_7_9 0.836
   Count 71 98 169
   mean 0.130 0.136 0.133
   SD 0.071 0.082 0.077
   Median 0.112 0.115 0.114
   Q1,Q3 0.085, 0.165 0.081, 0.192 0.082, 0.186
   Min 0.000 0.005 0.000
   Max 0.336 0.455 0.455
total_contraction_rate_over10 0.010
   Count 71 98 169
   mean 0.119 0.085 0.099
   SD 0.091 0.071 0.081
   Median 0.106 0.067 0.078
   Q1,Q3 0.048, 0.160 0.030, 0.125 0.038, 0.143
   Min 0.000 0.000 0.000
   Max 0.405 0.283 0.405
number_of_contractions_over26.7_per_hour < 0.001
   Count 71 98 169
   mean 46.195 27.688 35.463
   SD 35.738 23.529 30.590
   Median 38.169 20.876 25.165
   Q1,Q3 20.018, 59.233 11.757, 37.625 15.186, 47.064
   Min 4.460 3.119 3.119
   Max 168.976 155.521 168.976
imputed_total_number_of_contractions < 0.001
   Count 71 98 169
   mean 1561.901 842.918 1144.976
   SD 1075.089 398.189 836.553
   Median 1233.000 788.000 963.000
   Q1,Q3 955.000, 1826.000 577.000, 1006.000 673.000, 1336.000
   Min 332.000 204.000 204.000
   Max 6210.000 2526.000 6210.000
imputed_sum_of_amplitude < 0.001
   Count 71 98 169
   mean 187578.734 108176.988 141535.118
   SD 139333.716 38328.935 102383.712
   Median 158548.129 103445.589 119039.990
   Q1,Q3 115931.473, 200990.440 79719.797, 129270.590 89727.664, 160351.580
   Min 62203.157 34829.877 34829.877
   Max 950697.962 225832.269 950697.962
number_of_fast_epochs 0.361
   Count 71 98 169
   mean 4.113 4.724 4.467
   SD 3.871 4.497 4.244
   Median 3.000 4.000 4.000
   Q1,Q3 1.000, 6.000 1.000, 7.000 1.000, 7.000
   Min 0.000 0.000 0.000
   Max 17.000 27.000 27.000
number_of_medium_epochs 0.013
   Count 71 98 169
   mean 2.592 3.531 3.136
   SD 2.950 2.894 2.946
   Median 2.000 3.000 2.000
   Q1,Q3 1.000, 3.000 1.000, 5.000 1.000, 5.000
   Min 0.000 0.000 0.000
   Max 18.000 13.000 18.000
number_of_slow_epochs 0.006
   Count 71 98 169
   mean 3.437 4.663 4.148
   SD 3.379 3.386 3.427
   Median 3.000 4.000 3.000
   Q1,Q3 1.000, 5.000 2.000, 6.000 1.000, 6.000
   Min 0.000 0.000 0.000
   Max 15.000 15.000 15.000
number_of_total_epochs 0.011
   Count 71 98 169
   mean 10.141 12.918 11.751
   SD 7.769 8.196 8.113
   Median 9.000 12.000 10.000
   Q1,Q3 5.000, 14.000 7.000, 18.000 6.000, 17.000
   Min 1.000 1.000 1.000
   Max 37.000 51.000 51.000
number_of_fast_epochs_frequency 0.651
   Count 71 98 169
   mean 1.275 1.377 1.334
   SD 1.138 1.188 1.165
   Median 1.115 1.108 1.115
   Q1,Q3 0.395, 1.765 0.378, 2.274 0.384, 1.931
   Min 0.000 0.000 0.000
   Max 5.168 4.601 5.168
number_of_medium_epochs_frequency 0.027
   Count 71 98 169
   mean 0.729 1.019 0.897
   SD 0.654 0.837 0.777
   Median 0.622 0.847 0.743
   Q1,Q3 0.300, 0.994 0.339, 1.529 0.315, 1.268
   Min 0.000 0.000 0.000
   Max 3.240 4.302 4.302
number_of_slow_epochs_frequency 0.002
   Count 71 98 169
   mean 0.899 1.295 1.129
   SD 0.722 0.850 0.820
   Median 0.779 1.214 0.980
   Q1,Q3 0.366, 1.406 0.689, 1.832 0.497, 1.649
   Min 0.000 0.000 0.000
   Max 3.232 3.648 3.648
number_of_total_epochs_frequency 0.008
   Count 71 98 169
   mean 2.903 3.691 3.360
   SD 1.803 2.027 1.970
   Median 2.512 3.430 3.091
   Q1,Q3 1.689, 3.855 2.159, 4.985 1.779, 4.427
   Min 0.326 0.302 0.302
   Max 7.846 9.386 9.386
motility_index 0.023
   Count 71 98 169
   mean 18.329 18.001 18.139
   SD 0.928 0.873 0.908
   Median 18.397 18.086 18.147
   Q1,Q3 17.833, 19.057 17.521, 18.615 17.589, 18.737
   Min 15.981 15.750 15.750
   Max 20.278 20.154 20.278
Control_Patients_5min_RMSSD <- Control_Patients_5min %>%
  group_by(combined_ID, epoch_type) %>%
  summarize(
    mean_RMSSD_Time = mean(epoch_RMSSD_Time),
    median_RMSSD_Time = median(epoch_RMSSD_Time),
    sd_RMSSD_Time = sd(epoch_RMSSD_Time),
    mean_RMSSD_Magnitude = mean(epoch_RMSSD_Magnitude),
    median_RMSSD_Magnitude = median(epoch_RMSSD_Magnitude),
    sd_RMSSD_Magnitude = sd(epoch_RMSSD_Magnitude),
    n_epochs = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(
    id_cols = combined_ID,
    names_from = epoch_type,
    values_from = c(mean_RMSSD_Time, median_RMSSD_Time, sd_RMSSD_Time, mean_RMSSD_Magnitude, median_RMSSD_Magnitude, sd_RMSSD_Magnitude, n_epochs)) %>%
  mutate(source = "Jack's Data")

Control_Patients_5min_total_epoch <- Control_Patients_5min %>%
  group_by(combined_ID) %>%
  summarize(
    median_RMSSD_Time_total_epoch = median(epoch_RMSSD_Time),
    median_RMSSD_Magnitude_total_epoch = median(epoch_RMSSD_Magnitude),
    .groups = "drop"
  )

Control_Patients_5min_RMSSD <- Control_Patients_5min_RMSSD %>%
  left_join(Control_Patients_5min_total_epoch)

EVA_Patients_5min_RMSSD <- EVA_Patients_5min %>%
  group_by(combined_ID, epoch_type) %>%
  summarize(
    mean_RMSSD_Time = mean(epoch_RMSSD_Time),
    median_RMSSD_Time = median(epoch_RMSSD_Time),
    sd_RMSSD_Time = sd(epoch_RMSSD_Time),
    mean_RMSSD_Magnitude = mean(epoch_RMSSD_Magnitude),
    median_RMSSD_Magnitude = median(epoch_RMSSD_Magnitude),
    sd_RMSSD_Magnitude = sd(epoch_RMSSD_Magnitude),
    n_epochs = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(
    id_cols = combined_ID,
    names_from = epoch_type,
    values_from = c(mean_RMSSD_Time, median_RMSSD_Time, sd_RMSSD_Time, mean_RMSSD_Magnitude, median_RMSSD_Magnitude, sd_RMSSD_Magnitude, n_epochs)
  )%>%
  mutate(source = "EVA Data")

EVA_Patients_5min_total_epoch <- EVA_Patients_5min %>%
  group_by(combined_ID) %>%
  summarize(
    median_RMSSD_Time_total_epoch = median(epoch_RMSSD_Time),
    median_RMSSD_Magnitude_total_epoch = median(epoch_RMSSD_Magnitude),
    .groups = "drop"
  )

EVA_Patients_5min_RMSSD <- EVA_Patients_5min_RMSSD %>%
  left_join(EVA_Patients_5min_total_epoch)

EVA_Controls_5min_RMSSD <- EVA_Controls_5min %>%
  group_by(combined_ID, epoch_type) %>%
  summarize(
    mean_RMSSD_Time = mean(epoch_RMSSD_Time),
    median_RMSSD_Time = median(epoch_RMSSD_Time),
    sd_RMSSD_Time = sd(epoch_RMSSD_Time),
    mean_RMSSD_Magnitude = mean(epoch_RMSSD_Magnitude),
    median_RMSSD_Magnitude = median(epoch_RMSSD_Magnitude),
    sd_RMSSD_Magnitude = sd(epoch_RMSSD_Magnitude),
    n_epochs = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(
    id_cols = combined_ID,
    names_from = epoch_type,
    values_from = c(mean_RMSSD_Time, median_RMSSD_Time, sd_RMSSD_Time, mean_RMSSD_Magnitude, median_RMSSD_Magnitude, sd_RMSSD_Magnitude, n_epochs)
  ) %>%
  mutate(source = "EVA Controls")

EVA_Controls_5min_total_epoch <- EVA_Controls_5min %>%
  group_by(combined_ID) %>%
  summarize(
    median_RMSSD_Time_total_epoch = median(epoch_RMSSD_Time),
    median_RMSSD_Magnitude_total_epoch = median(epoch_RMSSD_Magnitude),
    .groups = "drop"
  )

EVA_Controls_5min_RMSSD <- EVA_Controls_5min_RMSSD %>%
  left_join(EVA_Controls_5min_total_epoch)

all_data_RMSSD <- rbind(Control_Patients_5min_RMSSD, EVA_Patients_5min_RMSSD, EVA_Controls_5min_RMSSD)
mycontrols <- tableby.control(test = T, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "mean", "sd", "median", "q1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels=list(N='Count', Nmiss='Missing Values', mean='mean', sd='SD', median='Median', q1q3='Q1,Q3', min='Min', max='Max'))


tab.test <- tableby(source ~ `median_RMSSD_Time_fast epoch` + `median_RMSSD_Time_medium epoch` + `median_RMSSD_Time_slow epoch`+ median_RMSSD_Time_total_epoch + `median_RMSSD_Magnitude_fast epoch` + `median_RMSSD_Magnitude_medium epoch` + `median_RMSSD_Magnitude_slow epoch` + median_RMSSD_Magnitude_total_epoch + `n_epochs_fast epoch` + `n_epochs_medium epoch` + `n_epochs_slow epoch`, data=all_data_RMSSD, control = mycontrols)

summary(tab.test)
EVA Controls (N=12) EVA Data (N=71) Jack’s Data (N=98) Total (N=181) p value
median_RMSSD_Time_fast epoch 0.032
   Count 11 61 83 155
   Missing Values 1 10 15 26
   mean 52.868 53.136 50.465 51.687
   SD 6.292 6.481 4.285 5.518
   Median 53.760 52.540 49.280 50.290
   Q1,Q3 48.420, 56.660 48.160, 57.180 47.425, 53.165 47.672, 55.375
   Min 42.750 41.540 43.860 41.540
   Max 64.695 71.790 63.850 71.790
median_RMSSD_Time_medium epoch 0.016
   Count 8 58 87 153
   Missing Values 4 13 11 28
   mean 61.939 67.132 63.418 64.748
   SD 7.267 9.843 7.848 8.787
   Median 63.678 65.590 61.950 63.260
   Q1,Q3 57.229, 66.366 60.888, 70.885 58.100, 65.892 58.990, 68.070
   Min 49.140 50.480 52.110 49.140
   Max 71.835 101.345 96.330 101.345
median_RMSSD_Time_slow epoch 0.117
   Count 10 59 90 159
   Missing Values 2 12 8 22
   mean 83.365 83.876 78.458 80.777
   SD 15.330 14.217 9.098 11.886
   Median 80.938 80.715 77.975 79.930
   Q1,Q3 73.425, 86.995 73.755, 89.062 71.345, 84.540 72.765, 86.440
   Min 65.970 64.010 52.200 52.200
   Max 116.840 138.230 101.360 138.230
median_RMSSD_Time_total_epoch 0.992
   Count 12 71 98 181
   mean 63.672 64.233 64.060 64.102
   SD 9.922 12.860 10.438 11.361
   Median 64.442 63.615 62.395 63.260
   Q1,Q3 55.464, 69.134 57.370, 69.165 57.683, 70.236 57.280, 69.530
   Min 47.390 46.380 46.620 46.380
   Max 78.875 138.230 98.730 138.230
median_RMSSD_Magnitude_fast epoch 0.168
   Count 11 61 83 155
   Missing Values 1 10 15 26
   mean 84.860 73.035 65.556 69.869
   SD 48.989 28.061 24.956 28.711
   Median 69.730 72.370 61.095 66.850
   Q1,Q3 50.300, 107.558 54.360, 86.700 47.792, 75.675 49.400, 82.828
   Min 36.570 24.450 20.160 20.160
   Max 189.670 158.900 157.550 189.670
median_RMSSD_Magnitude_medium epoch 0.517
   Count 8 58 87 153
   Missing Values 4 13 11 28
   mean 55.849 60.919 56.492 58.137
   SD 21.074 25.692 23.552 24.220
   Median 53.620 53.445 49.800 51.210
   Q1,Q3 38.620, 72.134 42.785, 71.294 40.300, 63.960 40.820, 67.865
   Min 28.360 27.860 28.345 27.860
   Max 83.405 151.905 149.150 151.905
median_RMSSD_Magnitude_slow epoch 0.084
   Count 10 59 90 159
   Missing Values 2 12 8 22
   mean 49.098 47.339 43.154 45.081
   SD 24.307 24.965 26.142 25.543
   Median 41.740 43.950 35.885 40.860
   Q1,Q3 35.720, 53.392 34.177, 49.892 26.372, 49.825 28.727, 50.502
   Min 28.680 17.460 20.100 17.460
   Max 112.800 177.435 180.510 180.510
median_RMSSD_Magnitude_total_epoch 0.016
   Count 12 71 98 181
   mean 68.902 59.947 52.018 56.248
   SD 41.847 24.122 21.697 24.755
   Median 60.030 55.440 47.945 51.210
   Q1,Q3 45.650, 69.062 45.390, 71.448 37.714, 59.303 39.990, 65.010
   Min 33.395 22.125 21.700 21.700
   Max 189.670 139.395 136.030 189.670
n_epochs_fast epoch 0.131
   Count 11 61 83 155
   Missing Values 1 10 15 26
   mean 3.545 4.787 5.578 5.123
   SD 2.979 3.769 4.370 4.076
   Median 2.000 4.000 5.000 4.000
   Q1,Q3 1.000, 5.000 2.000, 7.000 2.500, 7.000 2.000, 7.000
   Min 1.000 1.000 1.000 1.000
   Max 10.000 17.000 27.000 27.000
n_epochs_medium epoch 0.070
   Count 8 58 87 153
   Missing Values 4 13 11 28
   mean 2.750 3.172 3.977 3.608
   SD 2.053 2.968 2.766 2.831
   Median 2.000 2.500 3.000 3.000
   Q1,Q3 1.750, 3.000 1.000, 4.000 2.000, 6.000 2.000, 5.000
   Min 1.000 1.000 1.000 1.000
   Max 6.000 18.000 13.000 18.000
n_epochs_slow epoch 0.088
   Count 10 59 90 159
   Missing Values 2 12 8 22
   mean 4.300 4.136 5.078 4.679
   SD 2.312 3.293 3.219 3.215
   Median 3.000 3.000 4.000 4.000
   Q1,Q3 3.000, 6.500 1.500, 5.500 3.000, 7.000 2.000, 6.500
   Min 1.000 1.000 1.000 1.000
   Max 8.000 15.000 15.000 15.000

4.4 Demographic Table

Plot_conditioning(all_data, c("Age" , "Sex" , "MCASS_total" , "Autonomic_Neuropathy" , "Prolonged_SBTT"), "source")

mycontrols <- tableby.control(test = T, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "mean", "sd", "median", "q1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels=list(N='Count', Nmiss='Missing Values', mean='mean', sd='SD', median='Median', q1q3='Q1,Q3', min='Min', max='Max'))


tab.test <- tableby(source ~ Age + Sex + MCASS_total + Autonomic_Neuropathy + Prolonged_SBTT, data=all_data, control = mycontrols)

summary(tab.test)
EVA Patient (N=71) Jack’s Data (N=98) Total (N=169) p value
Age 0.004
   Count 71 97 168
   Missing Values 0 1 1
   mean 52.986 45.577 48.708
   SD 11.819 18.271 16.246
   Median 55.000 41.000 48.500
   Q1,Q3 44.000, 62.500 30.000, 65.000 36.500, 64.000
   Min 28.000 18.000 18.000
   Max 76.000 80.000 80.000
Sex 0.002
   Female 18 (25.4%) 48 (49.0%) 66 (39.1%)
   Male 53 (74.6%) 50 (51.0%) 103 (60.9%)
MCASS_total
   Mild AN 20 (28.2%) 0 20 (28.2%)
   Moderate AN 3 (4.2%) 0 3 (4.2%)
   Normal/Equivocal 48 (67.6%) 0 48 (67.6%)
Autonomic_Neuropathy
   No 40 (56.3%) 0 40 (56.3%)
   Yes 31 (43.7%) 0 31 (43.7%)
Prolonged_SBTT < 0.001
   No 54 (77.1%) 94 (95.9%) 148 (88.1%)
   Yes 16 (22.9%) 4 (4.1%) 20 (11.9%)
mycontrols <- tableby.control(test = T, total = TRUE,
                              numeric.test = "kwt", cat.test = "chisq",
                              numeric.stats=c("N", "Nmiss", "mean", "sd", "median", "q1q3", "min", "max"),
                              cat.stats=c("countpct"),
                              stats.labels=list(N='Count', Nmiss='Missing Values', mean='mean', sd='SD', median='Median', q1q3='Q1,Q3', min='Min', max='Max'))


tab.test <- tableby(groups ~ Age + Sex + MCASS_total + Autonomic_Neuropathy + Prolonged_SBTT, data=all_data, control = mycontrols)

summary(tab.test)
Jack’s Data (N=98) normal_SBTT_abnormal_CASS (N=24) normal_SBTT_normal_CASS (N=30) prolonged_SBTT (N=16) Total (N=168) p value
Age 0.028
   Count 97 24 30 16 167
   Missing Values 1 0 0 0 1
   mean 45.577 54.083 53.600 51.750 48.832
   SD 18.271 12.392 10.820 12.008 16.215
   Median 41.000 57.000 53.000 47.000 49.000
   Q1,Q3 30.000, 65.000 48.250, 64.000 46.250, 60.000 40.000, 63.000 37.000, 64.000
   Min 18.000 29.000 30.000 39.000 18.000
   Max 80.000 70.000 76.000 69.000 80.000
Sex 0.012
   Female 48 (49.0%) 7 (29.2%) 7 (23.3%) 3 (18.8%) 65 (38.7%)
   Male 50 (51.0%) 17 (70.8%) 23 (76.7%) 13 (81.2%) 103 (61.3%)
MCASS_total
   Mild AN 0 17 (70.8%) 0 (0.0%) 3 (18.8%) 20 (28.6%)
   Moderate AN 0 2 (8.3%) 0 (0.0%) 1 (6.2%) 3 (4.3%)
   Normal/Equivocal 0 5 (20.8%) 30 (100.0%) 12 (75.0%) 47 (67.1%)
Autonomic_Neuropathy
   No 0 0 (0.0%) 30 (100.0%) 9 (56.2%) 39 (55.7%)
   Yes 0 24 (100.0%) 0 (0.0%) 7 (43.8%) 31 (44.3%)
Prolonged_SBTT < 0.001
   No 94 (95.9%) 24 (100.0%) 30 (100.0%) 0 (0.0%) 148 (88.1%)
   Yes 4 (4.1%) 0 (0.0%) 0 (0.0%) 16 (100.0%) 20 (11.9%)
all_data %>% filter(groups == "prolonged_SBTT") %>% pull(combined_ID) 
##  [1] 7022 7027 7037 7042 7045 7053 7055 7057 7060 7071 7114 7121 7131 7144 7146
## [16] 7161
final_spreadsheet %>% filter(ID %in% (all_data %>% filter(groups == "prolonged_SBTT") %>% pull(combined_ID))) %>% filter(as.numeric(CTT...35) > 3540) %>% select(CTT...35)
## # A tibble: 6 × 1
##   CTT...35          
##   <chr>             
## 1 5493              
## 2 3702              
## 3 5285              
## 4 5201              
## 5 21729.599999999999
## 6 5147

5 Correlation Matrix

5.1 EVA Patients

# EVA_Patients_5min_RMSSD_total <- EVA_Patients_5min %>% 
#   group_by(combined_ID) %>%
#   summarize(
#     mean_RMSSD_Time_total = mean(epoch_RMSSD_Time),
#     median_RMSSD_Time_total = median(epoch_RMSSD_Time),
#     sd_RMSSD_Time_total = sd(epoch_RMSSD_Time),
#     mean_RMSSD_Magnitude_total = mean(epoch_RMSSD_Magnitude),
#     median_RMSSD_Magnitude_total = median(epoch_RMSSD_Magnitude),
#     sd_RMSSD_Magnitude_total = sd(epoch_RMSSD_Magnitude),
#     .groups = "drop"
#   ) 

library(ggcorrplot)

# res <- cor(data.frame(
#   mean_RMSSD_Time_total = EVA_Patients_5min_RMSSD_total$mean_RMSSD_Time_total, 
#   median_RMSSD_Time_total = EVA_Patients_5min_RMSSD_total$median_RMSSD_Time_total, 
#   mean_RMSSD_Magnitude_total = EVA_Patients_5min_RMSSD_total$mean_RMSSD_Magnitude_total, 
#   median_RMSSD_Magnitude_total = EVA_Patients_5min_RMSSD_total$median_RMSSD_Magnitude_total, 
#   `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_total$`cass_baseline VM SBP`, 
#   `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_total$`cass_baseline VM DBP`, 
#   `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_total$`cass_Valsalva Ratio`, 
#   `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_total$`cass_HRDB Ratio`, 
#   `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_total$`cass_adrenergic_index`, 
#   `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_total$`cass_sudomotor_index`, 
#   `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_total$`cass_cardiovagal_index`, 
#   `cass_total` = EVA_Patients_5min_RMSSD_total$`cass_total`, 
#   `mcass_brsa` = EVA_Patients_5min_RMSSD_total$`mcass_brsa`, 
#   `mcass_brsv` = EVA_Patients_5min_RMSSD_total$`mcass_brsv`, 
#   `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_total$`mcass_adrenergic_index`, 
#   `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_total$`mcass_sudomotor_index`, 
#   `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_total$`mcass_cardiovagal_index`, 
#   `mcass_total` = EVA_Patients_5min_RMSSD_total$`mcass_total`
#   ), 
#   use = "complete.obs", method = "spearman")
# 
# # Select variables for x and y axes
# y_vars <- c("median_RMSSD_Time_total", "median_RMSSD_Magnitude_total")
# x_vars <- c("cass_baseline.VM.SBP", "cass_baseline.VM.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total") 
# 
# # Subset the correlation matrix
# sub_cor <- res[y_vars, x_vars]
# 
# # Convert to long format and plot with ggplot2
library(reshape2)
# df_long <- melt(sub_cor)
# 
# ggplot(df_long, aes(Var2, Var1, fill = value)) +
#   geom_tile(color = "white") +
#   geom_text(aes(label = round(value, 2)), size = 3) +
#   scale_fill_gradient2(
#     low = "red", mid = "white", high = "blue",
#     midpoint = 0, limit = c(-1, 1), name = "Correlation"
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
#   coord_fixed()
EVA_Patients_5min_RMSSD_joined <- EVA_Patients_5min_RMSSD %>% 
  left_join(final_spreadsheet, by = c("combined_ID" = "ID"))
res <- cor(data.frame(
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_fast epoch`, 
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_medium epoch`, 
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_slow epoch`, 
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_fast epoch`, 
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_medium epoch`, 
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Magnitude_total_epoch,
  `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_joined$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_joined$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_joined$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_joined$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_RMSSD_joined$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_RMSSD_joined$`mcass_total`
  ), 
  use = "pairwise.complete.obs", method = "spearman")

# Select variables for x and y axes
y_vars <- c("median_RMSSD_Magnitude_total_epoch", "median_RMSSD_Magnitude_slow.epoch", "median_RMSSD_Magnitude_medium.epoch", "median_RMSSD_Magnitude_fast.epoch", "median_RMSSD_Time_total_epoch", "median_RMSSD_Time_slow.epoch", "median_RMSSD_Time_medium.epoch", "median_RMSSD_Time_fast.epoch")
x_vars <- c("cass_baseline.VM.SBP", "cass_baseline.VM.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total") 

# Subset the correlation matrix
sub_cor <- res[y_vars, x_vars]

# Convert to long format and plot with ggplot2
df_long <- melt(sub_cor)

ggplot(df_long, aes(Var2, Var1, fill = value)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(value, 2)), size = 3) +
  scale_fill_gradient2(
    low = "red", mid = "white", high = "blue",
    midpoint = 0, limit = c(-1, 1), name = "Correlation"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  coord_fixed()

5.2 Prolonged SBTT

# EVA_Patients_5min_RMSSD_prolonged_SBTT <- EVA_Patients_5min_RMSSD_total %>%
#   filter(groups == "prolonged_SBTT")


# res <- cor(data.frame(
#   mean_RMSSD_Time_total = EVA_Patients_5min_RMSSD_prolonged_SBTT$mean_RMSSD_Time_total, 
#   median_RMSSD_Time_total = EVA_Patients_5min_RMSSD_prolonged_SBTT$median_RMSSD_Time_total, 
#   mean_RMSSD_Magnitude_total = EVA_Patients_5min_RMSSD_prolonged_SBTT$mean_RMSSD_Magnitude_total, 
#   median_RMSSD_Magnitude_total = EVA_Patients_5min_RMSSD_prolonged_SBTT$median_RMSSD_Magnitude_total, 
#   `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`cass_baseline VM SBP`, 
#   `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`cass_baseline VM DBP`, 
#   `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`cass_Valsalva Ratio`, 
#   `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`cass_HRDB Ratio`, 
#   `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`cass_adrenergic_index`, 
#   `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`cass_sudomotor_index`, 
#   `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`cass_cardiovagal_index`, 
#   `cass_total` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`cass_total`, 
#   `mcass_brsa` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`mcass_brsa`, 
#   `mcass_brsv` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`mcass_brsv`, 
#   `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`mcass_adrenergic_index`, 
#   `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`mcass_sudomotor_index`, 
#   `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`mcass_cardiovagal_index`, 
#   `mcass_total` = EVA_Patients_5min_RMSSD_prolonged_SBTT$`mcass_total`
#   ), 
#   use = "complete.obs", method = "spearman")
# 
# # Select variables for x and y axes
# y_vars <- c( "median_RMSSD_Time_total", "median_RMSSD_Magnitude_total")
# x_vars <- c("cass_baseline.VM.SBP", "cass_baseline.VM.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total") 
# 
# # Subset the correlation matrix
# sub_cor <- res[y_vars, x_vars]
# 
# # Convert to long format and plot with ggplot2
# df_long <- melt(sub_cor)
# 
# ggplot(df_long, aes(Var2, Var1, fill = value)) +
#   geom_tile(color = "white") +
#   geom_text(aes(label = round(value, 2)), size = 3) +
#   scale_fill_gradient2(
#     low = "red", mid = "white", high = "blue",
#     midpoint = 0, limit = c(-1, 1), name = "Correlation"
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
#   coord_fixed()
EVA_Patients_5min_RMSSD_joined_prolonged_SBTT <- EVA_Patients_5min_RMSSD_joined %>% filter(groups == "prolonged_SBTT")
res <- cor(data.frame(
  `mean_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`mean_RMSSD_Time_fast epoch`, 
  `mean_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`mean_RMSSD_Time_medium epoch`, 
  `mean_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`mean_RMSSD_Time_slow epoch`, 
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`median_RMSSD_Time_fast epoch`, 
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`median_RMSSD_Time_medium epoch`, 
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`median_RMSSD_Time_slow epoch`, 
  `mean_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`mean_RMSSD_Magnitude_fast epoch`, 
  `mean_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`mean_RMSSD_Magnitude_medium epoch`, 
  `mean_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`mean_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`median_RMSSD_Magnitude_fast epoch`, 
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`median_RMSSD_Magnitude_medium epoch`, 
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`median_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$median_RMSSD_Magnitude_total_epoch,
  `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_RMSSD_joined_prolonged_SBTT$`mcass_total`
  ), 
  use = "pairwise.complete.obs", method = "spearman")

# Select variables for x and y axes
y_vars <- c("median_RMSSD_Magnitude_total_epoch", "median_RMSSD_Magnitude_slow.epoch", "median_RMSSD_Magnitude_medium.epoch", "median_RMSSD_Magnitude_fast.epoch", "median_RMSSD_Time_total_epoch", "median_RMSSD_Time_slow.epoch", "median_RMSSD_Time_medium.epoch", "median_RMSSD_Time_fast.epoch")
x_vars <- c("cass_baseline.VM.SBP", "cass_baseline.VM.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total") 

# Subset the correlation matrix
sub_cor <- res[y_vars, x_vars]

# Convert to long format and plot with ggplot2
df_long <- melt(sub_cor)

ggplot(df_long, aes(Var2, Var1, fill = value)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(value, 2)), size = 3) +
  scale_fill_gradient2(
    low = "red", mid = "white", high = "blue",
    midpoint = 0, limit = c(-1, 1), name = "Correlation"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  coord_fixed()

5.3 normal_SBTT_normal_CASS

# EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS <- EVA_Patients_5min_RMSSD_total %>%
#   filter(groups == "normal_SBTT_normal_CASS")


# res <- cor(data.frame(
#   mean_RMSSD_Time_total = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$mean_RMSSD_Time_total, 
#   median_RMSSD_Time_total = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$median_RMSSD_Time_total, 
#   mean_RMSSD_Magnitude_total = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$mean_RMSSD_Magnitude_total, 
#   median_RMSSD_Magnitude_total = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$median_RMSSD_Magnitude_total, 
#   `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`cass_baseline VM SBP`, 
#   `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`cass_baseline VM DBP`, 
#   `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`cass_Valsalva Ratio`, 
#   `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`cass_HRDB Ratio`, 
#   `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`cass_adrenergic_index`, 
#   `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`cass_sudomotor_index`, 
#   `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`cass_cardiovagal_index`, 
#   `cass_total` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`cass_total`, 
#   `mcass_brsa` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`mcass_brsa`, 
#   `mcass_brsv` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`mcass_brsv`, 
#   `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`mcass_adrenergic_index`, 
#   `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`mcass_sudomotor_index`, 
#   `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`mcass_cardiovagal_index`, 
#   `mcass_total` = EVA_Patients_5min_RMSSD_normal_SBTT_normal_CASS$`mcass_total`
#   ), 
#   use = "complete.obs", method = "spearman")
# 
# # Select variables for x and y axes
# y_vars <- c("median_RMSSD_Time_total", "median_RMSSD_Magnitude_total")
# x_vars <- c("cass_baseline.VM.SBP", "cass_baseline.VM.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total") 
# 
# # Subset the correlation matrix
# sub_cor <- res[y_vars, x_vars]
# 
# # Convert to long format and plot with ggplot2
# library(reshape2)
# df_long <- melt(sub_cor)
# 
# ggplot(df_long, aes(Var2, Var1, fill = value)) +
#   geom_tile(color = "white") +
#   geom_text(aes(label = round(value, 2)), size = 3) +
#   scale_fill_gradient2(
#     low = "red", mid = "white", high = "blue",
#     midpoint = 0, limit = c(-1, 1), name = "Correlation"
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
#   coord_fixed()
EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS <- EVA_Patients_5min_RMSSD_joined %>% filter(groups == "normal_SBTT_normal_CASS")
res <- cor(data.frame(
  `mean_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`mean_RMSSD_Time_fast epoch`, 
  `mean_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`mean_RMSSD_Time_medium epoch`, 
  `mean_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`mean_RMSSD_Time_slow epoch`, 
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`median_RMSSD_Time_fast epoch`, 
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`median_RMSSD_Time_medium epoch`, 
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`median_RMSSD_Time_slow epoch`, 
  `mean_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`mean_RMSSD_Magnitude_fast epoch`, 
  `mean_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`mean_RMSSD_Magnitude_medium epoch`, 
  `mean_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`mean_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`median_RMSSD_Magnitude_fast epoch`, 
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`median_RMSSD_Magnitude_medium epoch`, 
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`median_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$median_RMSSD_Magnitude_total_epoch,
  `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_normal_CASS$`mcass_total`
  ), 
  use = "pairwise.complete.obs", method = "spearman")

# Select variables for x and y axes
y_vars <- c("median_RMSSD_Magnitude_total_epoch", "median_RMSSD_Magnitude_slow.epoch", "median_RMSSD_Magnitude_medium.epoch", "median_RMSSD_Magnitude_fast.epoch", "median_RMSSD_Time_total_epoch", "median_RMSSD_Time_slow.epoch", "median_RMSSD_Time_medium.epoch", "median_RMSSD_Time_fast.epoch")
x_vars <- c("cass_baseline.VM.SBP", "cass_baseline.VM.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total") 

# Subset the correlation matrix
sub_cor <- res[y_vars, x_vars]

# Convert to long format and plot with ggplot2
df_long <- melt(sub_cor)

ggplot(df_long, aes(Var2, Var1, fill = value)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(value, 2)), size = 3) +
  scale_fill_gradient2(
    low = "red", mid = "white", high = "blue",
    midpoint = 0, limit = c(-1, 1), name = "Correlation"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  coord_fixed()

5.4 normal_SBTT_abnormal_CASS

# EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS <- EVA_Patients_5min_RMSSD_total %>%
#   filter(groups == "normal_SBTT_abnormal_CASS")

# res <- cor(data.frame(
#   mean_RMSSD_Time_total = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$mean_RMSSD_Time_total, 
#   median_RMSSD_Time_total = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$median_RMSSD_Time_total, 
#   mean_RMSSD_Magnitude_total = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$mean_RMSSD_Magnitude_total, 
#   median_RMSSD_Magnitude_total = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$median_RMSSD_Magnitude_total, 
#   `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`cass_baseline VM SBP`, 
#   `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`cass_baseline VM DBP`, 
#   `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`cass_Valsalva Ratio`, 
#   `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`cass_HRDB Ratio`, 
#   `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`cass_adrenergic_index`, 
#   `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`cass_sudomotor_index`, 
#   `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`cass_cardiovagal_index`, 
#   `cass_total` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`cass_total`, 
#   `mcass_brsa` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`mcass_brsa`, 
#   `mcass_brsv` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`mcass_brsv`, 
#   `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`mcass_adrenergic_index`, 
#   `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`mcass_sudomotor_index`, 
#   `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`mcass_cardiovagal_index`, 
#   `mcass_total` = EVA_Patients_5min_RMSSD_normal_SBTT_abnormal_CASS$`mcass_total`
#   ), 
#   use = "complete.obs", method = "spearman")
# 
# # Select variables for x and y axes
# y_vars <- c("median_RMSSD_Time_total", "median_RMSSD_Magnitude_total")
# x_vars <- c("cass_baseline.VM.SBP", "cass_baseline.VM.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total") 
# 
# # Subset the correlation matrix
# sub_cor <- res[y_vars, x_vars]
# 
# # Convert to long format and plot with ggplot2
# library(reshape2)
# df_long <- melt(sub_cor)
# 
# ggplot(df_long, aes(Var2, Var1, fill = value)) +
#   geom_tile(color = "white") +
#   geom_text(aes(label = round(value, 2)), size = 3) +
#   scale_fill_gradient2(
#     low = "red", mid = "white", high = "blue",
#     midpoint = 0, limit = c(-1, 1), name = "Correlation"
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
#   coord_fixed()
EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS <- EVA_Patients_5min_RMSSD_joined %>% filter(groups == "normal_SBTT_abnormal_CASS")
res <- cor(data.frame(
  `mean_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`mean_RMSSD_Time_fast epoch`, 
  `mean_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`mean_RMSSD_Time_medium epoch`, 
  `mean_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`mean_RMSSD_Time_slow epoch`, 
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`median_RMSSD_Time_fast epoch`, 
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`median_RMSSD_Time_medium epoch`, 
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`median_RMSSD_Time_slow epoch`, 
  `mean_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`mean_RMSSD_Magnitude_fast epoch`, 
  `mean_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`mean_RMSSD_Magnitude_medium epoch`, 
  `mean_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`mean_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`median_RMSSD_Magnitude_fast epoch`, 
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`median_RMSSD_Magnitude_medium epoch`, 
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`median_RMSSD_Magnitude_slow epoch`,  
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$median_RMSSD_Magnitude_total_epoch,
  `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_RMSSD_joined_normal_SBTT_abnormal_CASS$`mcass_total`
  ), 
  use = "pairwise.complete.obs", method = "spearman")

# Select variables for x and y axes
y_vars <- c("median_RMSSD_Magnitude_total_epoch", "median_RMSSD_Magnitude_slow.epoch", "median_RMSSD_Magnitude_medium.epoch", "median_RMSSD_Magnitude_fast.epoch", "median_RMSSD_Time_total_epoch", "median_RMSSD_Time_slow.epoch", "median_RMSSD_Time_medium.epoch", "median_RMSSD_Time_fast.epoch")
x_vars <- c("cass_baseline.VM.SBP", "cass_baseline.VM.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total") 

# Subset the correlation matrix
sub_cor <- res[y_vars, x_vars]

# Convert to long format and plot with ggplot2
df_long <- melt(sub_cor)

ggplot(df_long, aes(Var2, Var1, fill = value)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(value, 2)), size = 3) +
  scale_fill_gradient2(
    low = "red", mid = "white", high = "blue",
    midpoint = 0, limit = c(-1, 1), name = "Correlation"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  coord_fixed()

5.5 EVA Controls

# EVA_Controls_5min_RMSSD_total <- EVA_Controls_5min %>% 
#   group_by(combined_ID) %>%
#   summarize(
#     mean_RMSSD_Time_total = mean(epoch_RMSSD_Time),
#     median_RMSSD_Time_total = median(epoch_RMSSD_Time),
#     sd_RMSSD_Time_total = sd(epoch_RMSSD_Time),
#     mean_RMSSD_Magnitude_total = mean(epoch_RMSSD_Magnitude),
#     median_RMSSD_Magnitude_total = median(epoch_RMSSD_Magnitude),
#     sd_RMSSD_Magnitude_total = sd(epoch_RMSSD_Magnitude),
#     .groups = "drop"
#   ) 
# 
# EVA_Controls_5min_RMSSD_total <- EVA_Controls_5min_RMSSD_total %>% 
#   left_join(final_spreadsheet_control, by = c("combined_ID" = "SUBJID"))


# res <- cor(data.frame(
#   mean_RMSSD_Time_total = EVA_Controls_5min_RMSSD_total$mean_RMSSD_Time_total, 
#   median_RMSSD_Time_total = EVA_Controls_5min_RMSSD_total$median_RMSSD_Time_total, 
#   mean_RMSSD_Magnitude_total = EVA_Controls_5min_RMSSD_total$mean_RMSSD_Magnitude_total, 
#   median_RMSSD_Magnitude_total = EVA_Controls_5min_RMSSD_total$median_RMSSD_Magnitude_total, 
#   `cass_baseline SBP` = EVA_Controls_5min_RMSSD_total$`cass_baseline SBP`, 
#   `cass_baseline DBP` = EVA_Controls_5min_RMSSD_total$`cass_baseline DBP`, 
#   `cass_Valsalva Ratio` = EVA_Controls_5min_RMSSD_total$`cass_valsalva ratio`, 
#   `cass_HRDB Ratio` = EVA_Controls_5min_RMSSD_total$`cass_HRDB ratio`, 
#   `cass_adrenergic_index` = EVA_Controls_5min_RMSSD_total$`cass_adrenergic_index`, 
#   `cass_sudomotor_index` = EVA_Controls_5min_RMSSD_total$`cass_sudomotor_index`, 
#   `cass_cardiovagal_index` = EVA_Controls_5min_RMSSD_total$`cass_cardiovagal_index`, 
#   `cass_total` = EVA_Controls_5min_RMSSD_total$`cass_total`, 
#   `mcass_brsa` = EVA_Controls_5min_RMSSD_total$`mcass_brsa`, 
#   `mcass_brsv` = EVA_Controls_5min_RMSSD_total$`mcass_brsv`, 
#   `mcass_adrenergic_index` = EVA_Controls_5min_RMSSD_total$`mcass_adrenergic_index`, 
#   `mcass_sudomotor_index` = EVA_Controls_5min_RMSSD_total$`mcass_sudomotor_index`, 
#   `mcass_cardiovagal_index` = EVA_Controls_5min_RMSSD_total$`mcass_cardiovagal_index`, 
#   `mcass_total` = EVA_Controls_5min_RMSSD_total$`mcass_total`
#   ), 
#   use = "complete.obs", method = "spearman")
# 
# # Select variables for x and y axes
# y_vars <- c("median_RMSSD_Time_total", "median_RMSSD_Magnitude_total")
# x_vars <- c("cass_baseline.SBP", "cass_baseline.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total") 
# 
# # Subset the correlation matrix
# sub_cor <- res[y_vars, x_vars]
# 
# # Convert to long format and plot with ggplot2
# df_long <- melt(sub_cor)
# 
# ggplot(df_long, aes(Var2, Var1, fill = value)) +
#   geom_tile(color = "white") +
#   geom_text(aes(label = round(value, 2)), size = 3) +
#   scale_fill_gradient2(
#     low = "red", mid = "white", high = "blue",
#     midpoint = 0, limit = c(-1, 1), name = "Correlation"
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
#   coord_fixed()
EVA_Controls_5min_RMSSD_joined <- EVA_Controls_5min_RMSSD %>% 
  left_join(final_spreadsheet_control, by = c("combined_ID" = "SUBJID"))
res <- cor(data.frame(
  `mean_RMSSD_Time_fast epoch` = EVA_Controls_5min_RMSSD_joined$`mean_RMSSD_Time_fast epoch`, 
  `mean_RMSSD_Time_medium epoch` = EVA_Controls_5min_RMSSD_joined$`mean_RMSSD_Time_medium epoch`, 
  `mean_RMSSD_Time_slow epoch` = EVA_Controls_5min_RMSSD_joined$`mean_RMSSD_Time_slow epoch`, 
  `median_RMSSD_Time_fast epoch` = EVA_Controls_5min_RMSSD_joined$`median_RMSSD_Time_fast epoch`, 
  `median_RMSSD_Time_medium epoch` = EVA_Controls_5min_RMSSD_joined$`median_RMSSD_Time_medium epoch`, 
  `median_RMSSD_Time_slow epoch` = EVA_Controls_5min_RMSSD_joined$`median_RMSSD_Time_slow epoch`, 
  `mean_RMSSD_Magnitude_fast epoch` = EVA_Controls_5min_RMSSD_joined$`mean_RMSSD_Magnitude_fast epoch`, 
  `mean_RMSSD_Magnitude_medium epoch` = EVA_Controls_5min_RMSSD_joined$`mean_RMSSD_Magnitude_medium epoch`, 
  `mean_RMSSD_Magnitude_slow epoch` = EVA_Controls_5min_RMSSD_joined$`mean_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Magnitude_fast epoch` = EVA_Controls_5min_RMSSD_joined$`median_RMSSD_Magnitude_fast epoch`, 
  `median_RMSSD_Magnitude_medium epoch` = EVA_Controls_5min_RMSSD_joined$`median_RMSSD_Magnitude_medium epoch`, 
  `median_RMSSD_Magnitude_slow epoch` = EVA_Controls_5min_RMSSD_joined$`median_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Time_total_epoch` = EVA_Controls_5min_RMSSD_joined$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Controls_5min_RMSSD_joined$median_RMSSD_Magnitude_total_epoch,
  `cass_baseline SBP` = EVA_Controls_5min_RMSSD_joined$`cass_baseline SBP`, 
  `cass_baseline DBP` = EVA_Controls_5min_RMSSD_joined$`cass_baseline DBP`, 
  `cass_Valsalva Ratio` = EVA_Controls_5min_RMSSD_joined$`cass_valsalva ratio`, 
  `cass_HRDB Ratio` = EVA_Controls_5min_RMSSD_joined$`cass_HRDB ratio`, 
  `cass_adrenergic_index` = EVA_Controls_5min_RMSSD_joined$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Controls_5min_RMSSD_joined$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Controls_5min_RMSSD_joined$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Controls_5min_RMSSD_joined$`cass_total`, 
  `mcass_brsa` = EVA_Controls_5min_RMSSD_joined$`mcass_brsa`, 
  `mcass_brsv` = EVA_Controls_5min_RMSSD_joined$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Controls_5min_RMSSD_joined$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Controls_5min_RMSSD_joined$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Controls_5min_RMSSD_joined$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Controls_5min_RMSSD_joined$`mcass_total`
  ), 
  use = "pairwise.complete.obs", method = "spearman")

# Select variables for x and y axes
y_vars <- c("median_RMSSD_Magnitude_total_epoch", "median_RMSSD_Magnitude_slow.epoch", "median_RMSSD_Magnitude_medium.epoch", "median_RMSSD_Magnitude_fast.epoch", "median_RMSSD_Time_total_epoch", "median_RMSSD_Time_slow.epoch", "median_RMSSD_Time_medium.epoch", "median_RMSSD_Time_fast.epoch")
x_vars <- c("cass_baseline.SBP", "cass_baseline.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total") 

# Subset the correlation matrix
sub_cor <- res[y_vars, x_vars]

# Convert to long format and plot with ggplot2
df_long <- melt(sub_cor)

ggplot(df_long, aes(Var2, Var1, fill = value)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(value, 2)), size = 3) +
  scale_fill_gradient2(
    low = "red", mid = "white", high = "blue",
    midpoint = 0, limit = c(-1, 1), name = "Correlation"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  coord_fixed()

5.6 New Correlation Matrix 1

EVA_Patients_5min_general <- EVA_Patients_5min %>% 
  group_by(combined_ID) %>% 
  slice(1) %>% 
  select(-c(Age, Sex, SBTT...34, groups)) %>%
  left_join(final_spreadsheet, by = c("combined_ID" = "ID"))
res <- cor(data.frame(
  total_contractions_frequency = EVA_Patients_5min_general$total_contractions_frequency,
  total_contraction_rate_over10 = EVA_Patients_5min_general$total_contraction_rate_over10,
  number_of_contractions_over26.7_per_hour = EVA_Patients_5min_general$number_of_contractions_over26.7_per_hour,
  number_of_total_epochs_frequency = EVA_Patients_5min_general$number_of_total_epochs_frequency,
  imputed_sum_of_amplitude = EVA_Patients_5min_general$imputed_sum_of_amplitude,
  motility_index =EVA_Patients_5min_general$motility_index,
  `cass_baseline VM SBP` = EVA_Patients_5min_general$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_general$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_general$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_general$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_general$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_general$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_general$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_general$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_general$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_general$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_general$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_general$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_general$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_general$`mcass_total`  
  ),
  use = "pairwise.complete.obs", method = "spearman")

# Select variables for x and y axes
y_vars <- c("total_contractions_frequency", "total_contraction_rate_over10", "number_of_contractions_over26.7_per_hour", "number_of_total_epochs_frequency", "imputed_sum_of_amplitude", "motility_index")
x_vars <- c("cass_adrenergic_index",  "cass_sudomotor_index", "mcass_adrenergic_index", "mcass_sudomotor_index") 



# Subset the correlation matrix
sub_cor <- res[x_vars, y_vars]

# Convert to long format and plot with ggplot2
df_long <- melt(sub_cor)

ggplot(df_long, aes(Var2, Var1, fill = value)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(value, 2)), size = 3) +
  scale_fill_gradient2(
    low = "red", mid = "white", high = "blue",
    midpoint = 0, limit = c(-1, 1), name = "Correlation"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  coord_fixed()

res <- cor(data.frame(
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_fast epoch`, 
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_medium epoch`, 
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_slow epoch`, 
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_fast epoch`, 
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_medium epoch`, 
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Magnitude_total_epoch,
  `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_joined$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_joined$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_joined$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_joined$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_RMSSD_joined$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_RMSSD_joined$`mcass_total`
  ), 
  use = "pairwise.complete.obs", method = "spearman")

# Select variables for x and y axes
y_vars <- c("median_RMSSD_Magnitude_total_epoch", "median_RMSSD_Magnitude_slow.epoch", "median_RMSSD_Magnitude_medium.epoch", "median_RMSSD_Magnitude_fast.epoch", "median_RMSSD_Time_total_epoch", "median_RMSSD_Time_slow.epoch", "median_RMSSD_Time_medium.epoch", "median_RMSSD_Time_fast.epoch")
x_vars <- c("cass_adrenergic_index",  "cass_sudomotor_index", "mcass_adrenergic_index", "mcass_sudomotor_index") 



# Subset the correlation matrix
sub_cor <- res[x_vars, y_vars]

# Convert to long format and plot with ggplot2
df_long <- melt(sub_cor)

ggplot(df_long, aes(Var2, Var1, fill = value)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(value, 2)), size = 3) +
  scale_fill_gradient2(
    low = "red", mid = "white", high = "blue",
    midpoint = 0, limit = c(-1, 1), name = "Correlation"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
  coord_fixed()

5.7 New Correlation Matrix 2

EVA_Patients_5min_general_excluding_normal <- EVA_Patients_5min_general %>%
  filter(groups != "normal_SBTT_normal_CASS")
res <- cor(data.frame(
  total_contractions_frequency = EVA_Patients_5min_general_excluding_normal$total_contractions_frequency,
  total_contraction_rate_over10 = EVA_Patients_5min_general_excluding_normal$total_contraction_rate_over10,
  number_of_contractions_over26.7_per_hour = EVA_Patients_5min_general_excluding_normal$number_of_contractions_over26.7_per_hour,
  number_of_total_epochs_frequency = EVA_Patients_5min_general_excluding_normal$number_of_total_epochs_frequency,
  imputed_sum_of_amplitude = EVA_Patients_5min_general_excluding_normal$imputed_sum_of_amplitude,
  motility_index =EVA_Patients_5min_general_excluding_normal$motility_index,
  `cass_baseline VM SBP` = EVA_Patients_5min_general_excluding_normal$`cass_baseline VM SBP`,
  `cass_baseline VM DBP` = EVA_Patients_5min_general_excluding_normal$`cass_baseline VM DBP`,
  `cass_Valsalva Ratio` = EVA_Patients_5min_general_excluding_normal$`cass_Valsalva Ratio`,
  `cass_HRDB Ratio` = EVA_Patients_5min_general_excluding_normal$`cass_HRDB Ratio`,
  `cass_adrenergic_index` = EVA_Patients_5min_general_excluding_normal$`cass_adrenergic_index`,
  `cass_sudomotor_index` = EVA_Patients_5min_general_excluding_normal$`cass_sudomotor_index`,
  `cass_cardiovagal_index` = EVA_Patients_5min_general_excluding_normal$`cass_cardiovagal_index`,
  `cass_total` = EVA_Patients_5min_general_excluding_normal$`cass_total`,
  `mcass_brsa` = EVA_Patients_5min_general_excluding_normal$`mcass_brsa`,
  `mcass_brsv` = EVA_Patients_5min_general_excluding_normal$`mcass_brsv`,
  `mcass_adrenergic_index` = EVA_Patients_5min_general_excluding_normal$`mcass_adrenergic_index`,
  `mcass_sudomotor_index` = EVA_Patients_5min_general_excluding_normal$`mcass_sudomotor_index`,
  `mcass_cardiovagal_index` = EVA_Patients_5min_general_excluding_normal$`mcass_cardiovagal_index`,
  `mcass_total` = EVA_Patients_5min_general_excluding_normal$`mcass_total`
  ),
  use = "pairwise.complete.obs", method = "spearman")

# Select variables for x and y axes
y_vars <- c("total_contractions_frequency", "total_contraction_rate_over10", "number_of_contractions_over26.7_per_hour", "number_of_total_epochs_frequency", "imputed_sum_of_amplitude", "motility_index")
x_vars <- c("cass_baseline.VM.SBP", "cass_baseline.VM.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total")



# Subset the correlation matrix
sub_cor <- res[y_vars, x_vars]

# Convert to long format and plot with ggplot2
df_long <- melt(sub_cor)

# ggplot(df_long, aes(Var2, Var1, fill = value)) +
#   geom_tile(color = "white") +
#   geom_text(aes(label = round(value, 2)), size = 3) +
#   scale_fill_gradient2(
#     low = "red", mid = "white", high = "blue",
#     midpoint = 0, limit = c(-1, 1), name = "Correlation"
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
#   coord_fixed()
# --- 1) helper: rectangular p-value matrix ---
get_pmat_rect <- function(df, y_vars, x_vars, method = "spearman", adjust = c("none","BH")) {
  adjust <- match.arg(adjust)
  p <- matrix(NA_real_, nrow = length(y_vars), ncol = length(x_vars),
              dimnames = list(y_vars, x_vars))
  for (i in seq_along(y_vars)) {
    for (j in seq_along(x_vars)) {
      x <- df[[x_vars[j]]]; y <- df[[y_vars[i]]]
      ok <- stats::complete.cases(x, y)
      if (sum(ok) >= 3) {
        p[i, j] <- suppressWarnings(cor.test(x[ok], y[ok], method = method)$p.value)
      }
    }
  }
  if (adjust != "none") {
    p[] <- p.adjust(as.vector(p), method = adjust)
  }
  p
}

# rectangular correlation submatrix
cor_mat <- cor(data.frame(
  total_contractions_frequency = EVA_Patients_5min_general_excluding_normal$total_contractions_frequency,
  total_contraction_rate_over10 = EVA_Patients_5min_general_excluding_normal$total_contraction_rate_over10,
  number_of_contractions_over26.7_per_hour = EVA_Patients_5min_general_excluding_normal$number_of_contractions_over26.7_per_hour,
  number_of_total_epochs_frequency = EVA_Patients_5min_general_excluding_normal$number_of_total_epochs_frequency,
  imputed_sum_of_amplitude = EVA_Patients_5min_general_excluding_normal$imputed_sum_of_amplitude,
  motility_index =EVA_Patients_5min_general_excluding_normal$motility_index,
  `cass_baseline VM SBP` = EVA_Patients_5min_general_excluding_normal$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_general_excluding_normal$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_general_excluding_normal$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_general_excluding_normal$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_general_excluding_normal$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_general_excluding_normal$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_general_excluding_normal$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_general_excluding_normal$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_general_excluding_normal$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_general_excluding_normal$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_general_excluding_normal$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_general_excluding_normal$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_general_excluding_normal$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_general_excluding_normal$`mcass_total`  
  ),
  use = "pairwise.complete.obs", method = "spearman")
sub_cor <- res[y_vars, x_vars]

# rectangular p-value matrix (choose method = "pearson"/"spearman"; adjust = "BH" if you want)
p_sub   <- get_pmat_rect(data.frame(
  total_contractions_frequency = EVA_Patients_5min_general_excluding_normal$total_contractions_frequency,
  total_contraction_rate_over10 = EVA_Patients_5min_general_excluding_normal$total_contraction_rate_over10,
  number_of_contractions_over26.7_per_hour = EVA_Patients_5min_general_excluding_normal$number_of_contractions_over26.7_per_hour,
  number_of_total_epochs_frequency = EVA_Patients_5min_general_excluding_normal$number_of_total_epochs_frequency,
  imputed_sum_of_amplitude = EVA_Patients_5min_general_excluding_normal$imputed_sum_of_amplitude,
  motility_index =EVA_Patients_5min_general_excluding_normal$motility_index,
  `cass_baseline VM SBP` = EVA_Patients_5min_general_excluding_normal$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_general_excluding_normal$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_general_excluding_normal$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_general_excluding_normal$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_general_excluding_normal$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_general_excluding_normal$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_general_excluding_normal$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_general_excluding_normal$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_general_excluding_normal$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_general_excluding_normal$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_general_excluding_normal$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_general_excluding_normal$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_general_excluding_normal$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_general_excluding_normal$`mcass_total`  
  ), y_vars, x_vars, method = "spearman", adjust = "none")

# long data for plotting
df_cor <- melt(sub_cor, varnames = c("Y","X"), value.name = "r")
df_p   <- melt(p_sub,   varnames = c("Y","X"), value.name = "p")

# flag significance (e.g., alpha = 0.05)
alpha <- 0.05
df_plot <- merge(df_cor, df_p, by = c("X","Y"))
df_plot$sig <- !is.na(df_plot$p) & df_plot$p < alpha

# --- 3) Plot A: correlation heatmap with numbers & significance marks ---
p_corr <- ggplot(df_plot, aes(X, Y, fill = r)) +
  geom_tile(color = "white") +
  geom_text(aes(label = sprintf("%.2f", r)), size = 3) +
  # add a star for significant cells
  # geom_text(data = subset(df_plot, sig), label = "*", vjust = -0.9, size = 4) +
  scale_fill_gradient2(low = "red", mid = "white", high = "blue",
                       midpoint = 0, limits = c(-1, 1), name = "Correlation") +
  coord_fixed() +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_blank())

p_corr

# --- 4) Plot B: p-value heatmap with numbers (0–1 scale) ---
p_pval <- ggplot(df_plot, aes(X, Y, fill = p)) +
  geom_tile(color = "white") +
  geom_text(aes(label = ifelse(is.na(p), "", sprintf("%.3f", p))), size = 3) +
  scale_fill_gradient2(low = "red", high = "yellow",
                       limits = c(0, 1), name = "p-value") +
  coord_fixed() +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_blank())

p_pval

EVA_Patients_5min_RMSSD_joined_excluding_normal <- EVA_Patients_5min_RMSSD_joined %>%
  filter(groups != "normal_SBTT_normal_CASS")
res <- cor(data.frame(
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Time_fast epoch`,
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Time_medium epoch`,
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Time_slow epoch`,
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Magnitude_fast epoch`,
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Magnitude_medium epoch`,
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Magnitude_slow epoch`,
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$median_RMSSD_Magnitude_total_epoch,
  `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_baseline VM SBP`,
  `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_baseline VM DBP`,
  `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_Valsalva Ratio`,
  `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_HRDB Ratio`,
  `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_adrenergic_index`,
  `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_sudomotor_index`,
  `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_cardiovagal_index`,
  `cass_total` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_total`,
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_brsa`,
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_brsv`,
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_adrenergic_index`,
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_sudomotor_index`,
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_cardiovagal_index`,
  `mcass_total` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_total`
  ),
  use = "pairwise.complete.obs", method = "spearman")

# Select variables for x and y axes
y_vars <- c("median_RMSSD_Magnitude_total_epoch", "median_RMSSD_Time_slow.epoch", "median_RMSSD_Time_medium.epoch", "median_RMSSD_Time_fast.epoch")
x_vars <- c("cass_baseline.VM.SBP", "cass_baseline.VM.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total")

# Subset the correlation matrix
sub_cor <- res[y_vars, x_vars]

# Convert to long format and plot with ggplot2
df_long <- melt(sub_cor)
# 
# ggplot(df_long, aes(Var2, Var1, fill = value)) +
#   geom_tile(color = "white") +
#   geom_text(aes(label = round(value, 2)), size = 3) +
#   scale_fill_gradient2(
#     low = "red", mid = "white", high = "blue",
#     midpoint = 0, limit = c(-1, 1), name = "Correlation"
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
#   coord_fixed()
# --- 1) helper: rectangular p-value matrix ---
get_pmat_rect <- function(df, y_vars, x_vars, method = "spearman", adjust = c("none","BH")) {
  adjust <- match.arg(adjust)
  p <- matrix(NA_real_, nrow = length(y_vars), ncol = length(x_vars),
              dimnames = list(y_vars, x_vars))
  for (i in seq_along(y_vars)) {
    for (j in seq_along(x_vars)) {
      x <- df[[x_vars[j]]]; y <- df[[y_vars[i]]]
      ok <- stats::complete.cases(x, y)
      if (sum(ok) >= 3) {
        p[i, j] <- suppressWarnings(cor.test(x[ok], y[ok], method = method)$p.value)
      }
    }
  }
  if (adjust != "none") {
    p[] <- p.adjust(as.vector(p), method = adjust)
  }
  p
}

# rectangular correlation submatrix
cor_mat <- cor(data.frame(
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Time_fast epoch`, 
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Time_medium epoch`, 
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Time_slow epoch`, 
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Magnitude_fast epoch`, 
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Magnitude_medium epoch`, 
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$median_RMSSD_Magnitude_total_epoch,
  `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_total`
  ),
  use = "pairwise.complete.obs", method = "spearman")
sub_cor <- res[y_vars, x_vars]

# rectangular p-value matrix
p_sub   <- get_pmat_rect(data.frame(
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Time_fast epoch`, 
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Time_medium epoch`, 
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Time_slow epoch`, 
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Magnitude_fast epoch`, 
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Magnitude_medium epoch`, 
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`median_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined_excluding_normal$median_RMSSD_Magnitude_total_epoch,
  `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_RMSSD_joined_excluding_normal$`mcass_total`
  ), y_vars, x_vars, method = "spearman", adjust = "none")

# long data for plotting
df_cor <- melt(sub_cor, varnames = c("Y","X"), value.name = "r")
df_p   <- melt(p_sub,   varnames = c("Y","X"), value.name = "p")

# flag significance (e.g., alpha = 0.05)
alpha <- 0.05
df_plot <- merge(df_cor, df_p, by = c("X","Y"))
df_plot$sig <- !is.na(df_plot$p) & df_plot$p < alpha

# --- 3) Plot A: correlation heatmap with numbers & significance marks ---
p_corr <- ggplot(df_plot, aes(X, Y, fill = r)) +
  geom_tile(color = "white") +
  geom_text(aes(label = sprintf("%.2f", r)), size = 3) +
  # add a star for significant cells
  # geom_text(data = subset(df_plot, sig), label = "*", vjust = -0.9, size = 4) +
  scale_fill_gradient2(low = "red", mid = "white", high = "blue",
                       midpoint = 0, limits = c(-1, 1), name = "Correlation") +
  coord_fixed() +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_blank())

p_corr

# --- 4) Plot B: p-value heatmap with numbers (0–1 scale) ---
p_pval <- ggplot(df_plot, aes(X, Y, fill = p)) +
  geom_tile(color = "white") +
  geom_text(aes(label = ifelse(is.na(p), "", sprintf("%.3f", p))), size = 3) +
  scale_fill_gradient2(low = "red", high = "yellow",
                       limits = c(0, 1), name = "p-value") +
  coord_fixed() +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_blank())

p_pval

5.8 New Correlation Matrix 3

res <- cor(data.frame(
  total_contractions_frequency = EVA_Patients_5min_general$total_contractions_frequency,
  total_contraction_rate_over10 = EVA_Patients_5min_general$total_contraction_rate_over10,
  number_of_contractions_over26.7_per_hour = EVA_Patients_5min_general$number_of_contractions_over26.7_per_hour,
  number_of_total_epochs_frequency = EVA_Patients_5min_general$number_of_total_epochs_frequency,
  imputed_sum_of_amplitude = EVA_Patients_5min_general$imputed_sum_of_amplitude,
  motility_index =EVA_Patients_5min_general$motility_index,
  `cass_baseline VM SBP` = EVA_Patients_5min_general$`cass_baseline VM SBP`,
  `cass_baseline VM DBP` = EVA_Patients_5min_general$`cass_baseline VM DBP`,
  `cass_Valsalva Ratio` = EVA_Patients_5min_general$`cass_Valsalva Ratio`,
  `cass_HRDB Ratio` = EVA_Patients_5min_general$`cass_HRDB Ratio`,
  `cass_adrenergic_index` = EVA_Patients_5min_general$`cass_adrenergic_index`,
  `cass_sudomotor_index` = EVA_Patients_5min_general$`cass_sudomotor_index`,
  `cass_cardiovagal_index` = EVA_Patients_5min_general$`cass_cardiovagal_index`,
  `cass_total` = EVA_Patients_5min_general$`cass_total`,
  `mcass_brsa` = EVA_Patients_5min_general$`mcass_brsa`,
  `mcass_brsv` = EVA_Patients_5min_general$`mcass_brsv`,
  `mcass_adrenergic_index` = EVA_Patients_5min_general$`mcass_adrenergic_index`,
  `mcass_sudomotor_index` = EVA_Patients_5min_general$`mcass_sudomotor_index`,
  `mcass_cardiovagal_index` = EVA_Patients_5min_general$`mcass_cardiovagal_index`,
  `mcass_total` = EVA_Patients_5min_general$`mcass_total`
  ),
  use = "pairwise.complete.obs", method = "spearman")

# Select variables for x and y axes
y_vars <- c("total_contractions_frequency", "total_contraction_rate_over10", "number_of_contractions_over26.7_per_hour", "number_of_total_epochs_frequency", "imputed_sum_of_amplitude", "motility_index")
x_vars <- c("cass_baseline.VM.SBP", "cass_baseline.VM.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total")



# Subset the correlation matrix
sub_cor <- res[y_vars, x_vars]

# Convert to long format and plot with ggplot2
df_long <- melt(sub_cor)
# 
# ggplot(df_long, aes(Var2, Var1, fill = value)) +
#   geom_tile(color = "white") +
#   geom_text(aes(label = round(value, 2)), size = 3) +
#   scale_fill_gradient2(
#     low = "red", mid = "white", high = "blue",
#     midpoint = 0, limit = c(-1, 1), name = "Correlation"
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
#   coord_fixed()
# --- 1) helper: rectangular p-value matrix ---
get_pmat_rect <- function(df, y_vars, x_vars, method = "spearman", adjust = c("none","BH")) {
  adjust <- match.arg(adjust)
  p <- matrix(NA_real_, nrow = length(y_vars), ncol = length(x_vars),
              dimnames = list(y_vars, x_vars))
  for (i in seq_along(y_vars)) {
    for (j in seq_along(x_vars)) {
      x <- df[[x_vars[j]]]; y <- df[[y_vars[i]]]
      ok <- stats::complete.cases(x, y)
      if (sum(ok) >= 3) {
        p[i, j] <- suppressWarnings(cor.test(x[ok], y[ok], method = method)$p.value)
      }
    }
  }
  if (adjust != "none") {
    p[] <- p.adjust(as.vector(p), method = adjust)
  }
  p
}

# rectangular correlation submatrix
cor_mat <- cor(data.frame(
  total_contractions_frequency = EVA_Patients_5min_general$total_contractions_frequency,
  total_contraction_rate_over10 = EVA_Patients_5min_general$total_contraction_rate_over10,
  number_of_contractions_over26.7_per_hour = EVA_Patients_5min_general$number_of_contractions_over26.7_per_hour,
  number_of_total_epochs_frequency = EVA_Patients_5min_general$number_of_total_epochs_frequency,
  imputed_sum_of_amplitude = EVA_Patients_5min_general$imputed_sum_of_amplitude,
  motility_index =EVA_Patients_5min_general$motility_index,
  `cass_baseline VM SBP` = EVA_Patients_5min_general$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_general$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_general$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_general$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_general$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_general$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_general$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_general$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_general$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_general$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_general$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_general$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_general$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_general$`mcass_total`  
  ),
  use = "pairwise.complete.obs", method = "spearman")
sub_cor <- res[y_vars, x_vars]

# rectangular p-value matrix (choose method = "pearson"/"spearman"; adjust = "BH" if you want)
p_sub   <- get_pmat_rect(data.frame(
  total_contractions_frequency = EVA_Patients_5min_general$total_contractions_frequency,
  total_contraction_rate_over10 = EVA_Patients_5min_general$total_contraction_rate_over10,
  number_of_contractions_over26.7_per_hour = EVA_Patients_5min_general$number_of_contractions_over26.7_per_hour,
  number_of_total_epochs_frequency = EVA_Patients_5min_general$number_of_total_epochs_frequency,
  imputed_sum_of_amplitude = EVA_Patients_5min_general$imputed_sum_of_amplitude,
  motility_index =EVA_Patients_5min_general$motility_index,
  `cass_baseline VM SBP` = EVA_Patients_5min_general$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_general$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_general$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_general$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_general$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_general$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_general$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_general$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_general$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_general$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_general$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_general$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_general$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_general$`mcass_total`  
  ), y_vars, x_vars, method = "spearman", adjust = "none")

# long data for plotting
df_cor <- melt(sub_cor, varnames = c("Y","X"), value.name = "r")
df_p   <- melt(p_sub,   varnames = c("Y","X"), value.name = "p")

# flag significance (e.g., alpha = 0.05)
alpha <- 0.05
df_plot <- merge(df_cor, df_p, by = c("X","Y"))
df_plot$sig <- !is.na(df_plot$p) & df_plot$p < alpha

# --- 3) Plot A: correlation heatmap with numbers & significance marks ---
p_corr <- ggplot(df_plot, aes(X, Y, fill = r)) +
  geom_tile(color = "white") +
  geom_text(aes(label = sprintf("%.2f", r)), size = 3) +
  # add a star for significant cells
  # geom_text(data = subset(df_plot, sig), label = "*", vjust = -0.9, size = 4) +
  scale_fill_gradient2(low = "red", mid = "white", high = "blue",
                       midpoint = 0, limits = c(-1, 1), name = "Correlation") +
  coord_fixed() +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_blank())

p_corr

# --- 4) Plot B: p-value heatmap with numbers (0–1 scale) ---
p_pval <- ggplot(df_plot, aes(X, Y, fill = p)) +
  geom_tile(color = "white") +
  geom_text(aes(label = ifelse(is.na(p), "", sprintf("%.3f", p))), size = 3) +
  scale_fill_gradient2(low = "red", high = "yellow",
                       limits = c(0, 1), name = "p-value") +
  coord_fixed() +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_blank())

p_pval

res <- cor(data.frame(
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_fast epoch`,
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_medium epoch`,
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_slow epoch`,
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_fast epoch`,
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_medium epoch`,
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_slow epoch`,
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Magnitude_total_epoch,
  `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_joined$`cass_baseline VM SBP`,
  `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_joined$`cass_baseline VM DBP`,
  `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_joined$`cass_Valsalva Ratio`,
  `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_joined$`cass_HRDB Ratio`,
  `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined$`cass_adrenergic_index`,
  `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined$`cass_sudomotor_index`,
  `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined$`cass_cardiovagal_index`,
  `cass_total` = EVA_Patients_5min_RMSSD_joined$`cass_total`,
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined$`mcass_brsa`,
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined$`mcass_brsv`,
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined$`mcass_adrenergic_index`,
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined$`mcass_sudomotor_index`,
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined$`mcass_cardiovagal_index`,
  `mcass_total` = EVA_Patients_5min_RMSSD_joined$`mcass_total`
  ),
  use = "pairwise.complete.obs", method = "spearman")

# Select variables for x and y axes
y_vars <- c("median_RMSSD_Magnitude_total_epoch", "median_RMSSD_Time_slow.epoch", "median_RMSSD_Time_medium.epoch", "median_RMSSD_Time_fast.epoch")
x_vars <- c("cass_baseline.VM.SBP", "cass_baseline.VM.DBP", "cass_Valsalva.Ratio", "cass_HRDB.Ratio", "cass_adrenergic_index",  "cass_sudomotor_index", "cass_cardiovagal_index", "cass_total", "mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total")

# Subset the correlation matrix
sub_cor <- res[y_vars, x_vars]
# 
# # Convert to long format and plot with ggplot2
# df_long <- melt(sub_cor)
# 
# ggplot(df_long, aes(Var2, Var1, fill = value)) +
#   geom_tile(color = "white") +
#   geom_text(aes(label = round(value, 2)), size = 3) +
#   scale_fill_gradient2(
#     low = "red", mid = "white", high = "blue",
#     midpoint = 0, limit = c(-1, 1), name = "Correlation"
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
#   coord_fixed()
# --- 1) helper: rectangular p-value matrix ---
get_pmat_rect <- function(df, y_vars, x_vars, method = "spearman", adjust = c("none","BH")) {
  adjust <- match.arg(adjust)
  p <- matrix(NA_real_, nrow = length(y_vars), ncol = length(x_vars),
              dimnames = list(y_vars, x_vars))
  for (i in seq_along(y_vars)) {
    for (j in seq_along(x_vars)) {
      x <- df[[x_vars[j]]]; y <- df[[y_vars[i]]]
      ok <- stats::complete.cases(x, y)
      if (sum(ok) >= 3) {
        p[i, j] <- suppressWarnings(cor.test(x[ok], y[ok], method = method)$p.value)
      }
    }
  }
  if (adjust != "none") {
    p[] <- p.adjust(as.vector(p), method = adjust)
  }
  p
}

# rectangular correlation submatrix
cor_mat <- cor(data.frame(
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_fast epoch`, 
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_medium epoch`, 
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_slow epoch`, 
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_fast epoch`, 
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_medium epoch`, 
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Magnitude_total_epoch,
  `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_joined$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_joined$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_joined$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_joined$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_RMSSD_joined$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_RMSSD_joined$`mcass_total`
  ),
  use = "pairwise.complete.obs", method = "spearman")
sub_cor <- res[y_vars, x_vars]

# rectangular p-value matrix
p_sub   <- get_pmat_rect(data.frame(
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_fast epoch`, 
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_medium epoch`, 
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_slow epoch`, 
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_fast epoch`, 
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_medium epoch`, 
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Magnitude_total_epoch,
  `cass_baseline VM SBP` = EVA_Patients_5min_RMSSD_joined$`cass_baseline VM SBP`, 
  `cass_baseline VM DBP` = EVA_Patients_5min_RMSSD_joined$`cass_baseline VM DBP`, 
  `cass_Valsalva Ratio` = EVA_Patients_5min_RMSSD_joined$`cass_Valsalva Ratio`, 
  `cass_HRDB Ratio` = EVA_Patients_5min_RMSSD_joined$`cass_HRDB Ratio`, 
  `cass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined$`cass_adrenergic_index`, 
  `cass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined$`cass_sudomotor_index`, 
  `cass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined$`cass_cardiovagal_index`, 
  `cass_total` = EVA_Patients_5min_RMSSD_joined$`cass_total`, 
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_RMSSD_joined$`mcass_total`
  ), y_vars, x_vars, method = "spearman", adjust = "none")

# long data for plotting
df_cor <- melt(sub_cor, varnames = c("Y","X"), value.name = "r")
df_p   <- melt(p_sub,   varnames = c("Y","X"), value.name = "p")

# flag significance (e.g., alpha = 0.05)
alpha <- 0.05
df_plot <- merge(df_cor, df_p, by = c("X","Y"))
df_plot$sig <- !is.na(df_plot$p) & df_plot$p < alpha

# --- 3) Plot A: correlation heatmap with numbers & significance marks ---
p_corr <- ggplot(df_plot, aes(X, Y, fill = r)) +
  geom_tile(color = "white") +
  geom_text(aes(label = sprintf("%.2f", r)), size = 3) +
  # add a star for significant cells
  # geom_text(data = subset(df_plot, sig), label = "*", vjust = -0.9, size = 4) +
  scale_fill_gradient2(low = "red", mid = "white", high = "blue",
                       midpoint = 0, limits = c(-1, 1), name = "Correlation") +
  coord_fixed() +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_blank())

p_corr

# --- 4) Plot B: p-value heatmap with numbers (0–1 scale) ---
p_pval <- ggplot(df_plot, aes(X, Y, fill = p)) +
  geom_tile(color = "white") +
  geom_text(aes(label = ifelse(is.na(p), "", sprintf("%.3f", p))), size = 3) +
  scale_fill_gradient2(low = "red", high = "yellow",
                       limits = c(0, 1), name = "p-value") +
  coord_fixed() +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_blank())

p_pval

res_new <- cor(data.frame(
  total_contractions_frequency = EVA_Patients_5min_general$total_contractions_frequency,
  total_contraction_rate_over10 = EVA_Patients_5min_general$total_contraction_rate_over10,
  number_of_contractions_over26.7_per_hour = EVA_Patients_5min_general$number_of_contractions_over26.7_per_hour,
  number_of_total_epochs_frequency = EVA_Patients_5min_general$number_of_total_epochs_frequency,
  imputed_sum_of_amplitude = EVA_Patients_5min_general$imputed_sum_of_amplitude,
  motility_index =EVA_Patients_5min_general$motility_index,
  `mcass_brsa` = EVA_Patients_5min_general$`mcass_brsa`,
  `mcass_brsv` = EVA_Patients_5min_general$`mcass_brsv`,
  `mcass_adrenergic_index` = EVA_Patients_5min_general$`mcass_adrenergic_index`,
  `mcass_sudomotor_index` = EVA_Patients_5min_general$`mcass_sudomotor_index`,
  `mcass_cardiovagal_index` = EVA_Patients_5min_general$`mcass_cardiovagal_index`,
  `mcass_total` = EVA_Patients_5min_general$`mcass_total`
  ),
  use = "pairwise.complete.obs", method = "spearman")

# Select variables for x and y axes
y_vars <- c("total_contractions_frequency", "total_contraction_rate_over10", "number_of_contractions_over26.7_per_hour", "number_of_total_epochs_frequency", "imputed_sum_of_amplitude", "motility_index")
x_vars_new <- c("mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total")



# Subset the correlation matrix
sub_cor <- res_new[y_vars, x_vars_new]

# Convert to long format and plot with ggplot2
df_long <- melt(sub_cor)
# 
# ggplot(df_long, aes(Var2, Var1, fill = value)) +
#   geom_tile(color = "white") +
#   geom_text(aes(label = round(value, 2)), size = 3) +
#   scale_fill_gradient2(
#     low = "red", mid = "white", high = "blue",
#     midpoint = 0, limit = c(-1, 1), name = "Correlation"
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
#   coord_fixed()
# --- 1) helper: rectangular p-value matrix ---
get_pmat_rect <- function(df, y_vars, x_vars, method = "spearman", adjust = c("none","BH")) {
  adjust <- match.arg(adjust)
  p <- matrix(NA_real_, nrow = length(y_vars), ncol = length(x_vars),
              dimnames = list(y_vars, x_vars))
  for (i in seq_along(y_vars)) {
    for (j in seq_along(x_vars)) {
      x <- df[[x_vars[j]]]; y <- df[[y_vars[i]]]
      ok <- stats::complete.cases(x, y)
      if (sum(ok) >= 3) {
        p[i, j] <- suppressWarnings(cor.test(x[ok], y[ok], method = method)$p.value)
      }
    }
  }
  if (adjust != "none") {
    p[] <- p.adjust(as.vector(p), method = adjust)
  }
  p
}

# rectangular correlation submatrix
cor_mat <- cor(data.frame(
  total_contractions_frequency = EVA_Patients_5min_general$total_contractions_frequency,
  total_contraction_rate_over10 = EVA_Patients_5min_general$total_contraction_rate_over10,
  number_of_contractions_over26.7_per_hour = EVA_Patients_5min_general$number_of_contractions_over26.7_per_hour,
  number_of_total_epochs_frequency = EVA_Patients_5min_general$number_of_total_epochs_frequency,
  imputed_sum_of_amplitude = EVA_Patients_5min_general$imputed_sum_of_amplitude,
  motility_index =EVA_Patients_5min_general$motility_index,
  `mcass_brsa` = EVA_Patients_5min_general$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_general$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_general$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_general$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_general$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_general$`mcass_total`  
  ),
  use = "pairwise.complete.obs", method = "spearman")
sub_cor <- res_new[y_vars, x_vars_new]

# rectangular p-value matrix (choose method = "pearson"/"spearman"; adjust = "BH" if you want)
p_sub   <- get_pmat_rect(data.frame(
  total_contractions_frequency = EVA_Patients_5min_general$total_contractions_frequency,
  total_contraction_rate_over10 = EVA_Patients_5min_general$total_contraction_rate_over10,
  number_of_contractions_over26.7_per_hour = EVA_Patients_5min_general$number_of_contractions_over26.7_per_hour,
  number_of_total_epochs_frequency = EVA_Patients_5min_general$number_of_total_epochs_frequency,
  imputed_sum_of_amplitude = EVA_Patients_5min_general$imputed_sum_of_amplitude,
  motility_index =EVA_Patients_5min_general$motility_index,
  `mcass_brsa` = EVA_Patients_5min_general$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_general$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_general$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_general$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_general$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_general$`mcass_total`  
  ), y_vars, x_vars_new, method = "spearman", adjust = "none")

# long data for plotting
df_cor <- melt(sub_cor, varnames = c("Y","X"), value.name = "r")
df_p   <- melt(p_sub,   varnames = c("Y","X"), value.name = "p")

# flag significance (e.g., alpha = 0.05)
alpha <- 0.05
df_plot <- merge(df_cor, df_p, by = c("X","Y"))
df_plot$sig <- !is.na(df_plot$p) & df_plot$p < alpha

# --- 3) Plot A: correlation heatmap with numbers & significance marks ---
p_corr <- ggplot(df_plot, aes(X, Y, fill = r)) +
  geom_tile(color = "white") +
  geom_text(aes(label = sprintf("%.2f", r)), size = 3) +
  # add a star for significant cells
  # geom_text(data = subset(df_plot, sig), label = "*", vjust = -0.9, size = 4) +
  scale_fill_gradient2(low = "red", mid = "white", high = "blue",
                       midpoint = 0, limits = c(-1, 1), name = "Correlation") +
  coord_fixed() +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_blank())

p_corr

# --- 4) Plot B: p-value heatmap with numbers (0–1 scale) ---
p_pval <- ggplot(df_plot, aes(X, Y, fill = p)) +
  geom_tile(color = "white") +
  geom_text(aes(label = ifelse(is.na(p), "", sprintf("%.3f", p))), size = 3) +
  scale_fill_gradient2(low = "red", high = "yellow",
                       limits = c(0, 1), name = "p-value") +
  coord_fixed() +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_blank())

p_pval

res_new <- cor(data.frame(
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_fast epoch`,
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_medium epoch`,
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_slow epoch`,
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_fast epoch`,
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_medium epoch`,
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_slow epoch`,
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Magnitude_total_epoch,
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined$`mcass_brsa`,
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined$`mcass_brsv`,
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined$`mcass_adrenergic_index`,
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined$`mcass_sudomotor_index`,
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined$`mcass_cardiovagal_index`,
  `mcass_total` = EVA_Patients_5min_RMSSD_joined$`mcass_total`
  ),
  use = "pairwise.complete.obs", method = "spearman")

# Select variables for x and y axes
y_vars <- c("median_RMSSD_Magnitude_total_epoch", "median_RMSSD_Time_slow.epoch", "median_RMSSD_Time_medium.epoch", "median_RMSSD_Time_fast.epoch")
x_vars_new <- c("mcass_brsa", "mcass_brsv", "mcass_adrenergic_index", "mcass_sudomotor_index", "mcass_cardiovagal_index", "mcass_total")

# Subset the correlation matrix
sub_cor <- res_new[y_vars, x_vars_new]
# 
# # Convert to long format and plot with ggplot2
# df_long <- melt(sub_cor)
# 
# ggplot(df_long, aes(Var2, Var1, fill = value)) +
#   geom_tile(color = "white") +
#   geom_text(aes(label = round(value, 2)), size = 3) +
#   scale_fill_gradient2(
#     low = "red", mid = "white", high = "blue",
#     midpoint = 0, limit = c(-1, 1), name = "Correlation"
#   ) +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
#   coord_fixed()
# --- 1) helper: rectangular p-value matrix ---
get_pmat_rect <- function(df, y_vars, x_vars, method = "spearman", adjust = c("none","BH")) {
  adjust <- match.arg(adjust)
  p <- matrix(NA_real_, nrow = length(y_vars), ncol = length(x_vars),
              dimnames = list(y_vars, x_vars))
  for (i in seq_along(y_vars)) {
    for (j in seq_along(x_vars)) {
      x <- df[[x_vars[j]]]; y <- df[[y_vars[i]]]
      ok <- stats::complete.cases(x, y)
      if (sum(ok) >= 3) {
        p[i, j] <- suppressWarnings(cor.test(x[ok], y[ok], method = method)$p.value)
      }
    }
  }
  if (adjust != "none") {
    p[] <- p.adjust(as.vector(p), method = adjust)
  }
  p
}

# rectangular correlation submatrix
cor_mat <- cor(data.frame(
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_fast epoch`, 
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_medium epoch`, 
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_slow epoch`, 
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_fast epoch`, 
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_medium epoch`, 
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Magnitude_total_epoch,
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_RMSSD_joined$`mcass_total`
  ),
  use = "pairwise.complete.obs", method = "spearman")
sub_cor_new <- res_new[y_vars, x_vars_new]

# rectangular p-value matrix
p_sub   <- get_pmat_rect(data.frame(
  `median_RMSSD_Time_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_fast epoch`, 
  `median_RMSSD_Time_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_medium epoch`, 
  `median_RMSSD_Time_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Time_slow epoch`, 
  `median_RMSSD_Magnitude_fast epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_fast epoch`, 
  `median_RMSSD_Magnitude_medium epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_medium epoch`, 
  `median_RMSSD_Magnitude_slow epoch` = EVA_Patients_5min_RMSSD_joined$`median_RMSSD_Magnitude_slow epoch`, 
  `median_RMSSD_Time_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Time_total_epoch,
  `median_RMSSD_Magnitude_total_epoch` = EVA_Patients_5min_RMSSD_joined$median_RMSSD_Magnitude_total_epoch,
  `mcass_brsa` = EVA_Patients_5min_RMSSD_joined$`mcass_brsa`, 
  `mcass_brsv` = EVA_Patients_5min_RMSSD_joined$`mcass_brsv`, 
  `mcass_adrenergic_index` = EVA_Patients_5min_RMSSD_joined$`mcass_adrenergic_index`, 
  `mcass_sudomotor_index` = EVA_Patients_5min_RMSSD_joined$`mcass_sudomotor_index`, 
  `mcass_cardiovagal_index` = EVA_Patients_5min_RMSSD_joined$`mcass_cardiovagal_index`, 
  `mcass_total` = EVA_Patients_5min_RMSSD_joined$`mcass_total`
  ), y_vars, x_vars_new, method = "spearman", adjust = "none")

# long data for plotting
df_cor <- melt(sub_cor, varnames = c("Y","X"), value.name = "r")
df_p   <- melt(p_sub,   varnames = c("Y","X"), value.name = "p")

# flag significance (e.g., alpha = 0.05)
alpha <- 0.05
df_plot <- merge(df_cor, df_p, by = c("X","Y"))
df_plot$sig <- !is.na(df_plot$p) & df_plot$p < alpha

# --- 3) Plot A: correlation heatmap with numbers & significance marks ---
p_corr <- ggplot(df_plot, aes(X, Y, fill = r)) +
  geom_tile(color = "white") +
  geom_text(aes(label = sprintf("%.2f", r)), size = 3) +
  # add a star for significant cells
  # geom_text(data = subset(df_plot, sig), label = "*", vjust = -0.9, size = 4) +
  scale_fill_gradient2(low = "red", mid = "white", high = "blue",
                       midpoint = 0, limits = c(-1, 1), name = "Correlation") +
  coord_fixed() +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_blank())

p_corr

# --- 4) Plot B: p-value heatmap with numbers (0–1 scale) ---
p_pval <- ggplot(df_plot, aes(X, Y, fill = p)) +
  geom_tile(color = "white") +
  geom_text(aes(label = ifelse(is.na(p), "", sprintf("%.3f", p))), size = 3) +
  scale_fill_gradient2(low = "red", high = "yellow",
                       limits = c(0, 1), name = "p-value") +
  coord_fixed() +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        panel.grid = element_blank())

p_pval

# write.csv(EVA_Controls_5min, "Final_EVA_Controls_5min.csv")
# write.csv(EVA_Patients_5min, "Final_EVA_Patients_5min.csv")
# write.csv(Control_Patients_5min, "Final_Control_Patients_5min.csv")
# write.csv(EVA_Controls_5min_RMSSD_joined, "Final_EVA_Controls_5min_joined.csv")
# write.csv(EVA_Patients_5min_RMSSD_joined, "Final_EVA_Patients_5min_RMSSD_joined.csv")
# write.csv(Control_Patients_5min_RMSSD, "Final_Control_Patients_5min_RMSSD.csv")

6 Modification Analysis

Question: If the relationship between “number_of_contractions_over26.7_per_hour” and “mcass_brsa” varies depending on the value of “mcass_total” and/or “mcass_cardiovagal_index”?

6.1 mcass_total

Since mcass_total and mcass_brsa are correlated with each other, having multicollinearity issue, the independence assumption is violated.

6.2 mcass_cardiovagal_index

6.2.1 Poisson Regression

model <- glm(total_number_of_contractions_over26.7  ~ mcass_brsa, offset = log(total_no_missing_time), family = poisson(link = "log"), data = EVA_Patients_5min_general_excluding_normal)
sum(residuals(model, type = "pearson")^2)/df.residual(model)
## [1] 64.4671

Since Pearson Dispersion is much larger than 1, we should use Negative Binomial, instead of Poisson Regression.

6.2.2 Negative Binomial

6.2.2.1 Unadjusted Model

library(MASS)
summary(glm.nb(total_number_of_contractions_over26.7 ~ mcass_brsa + offset(log(total_no_missing_time)),  data = EVA_Patients_5min_general_excluding_normal))
## 
## Call:
## glm.nb(formula = total_number_of_contractions_over26.7 ~ mcass_brsa + 
##     offset(log(total_no_missing_time)), data = EVA_Patients_5min_general_excluding_normal, 
##     init.theta = 2.27066766, link = log)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.38710    0.22774   19.26  < 2e-16 ***
## mcass_brsa  -0.04152    0.01462   -2.84  0.00451 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(2.2707) family taken to be 1)
## 
##     Null deviance: 52.365  on 39  degrees of freedom
## Residual deviance: 42.968  on 38  degrees of freedom
## AIC: 474.64
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  2.271 
##           Std. Err.:  0.488 
## 
##  2 x log-likelihood:  -468.636
# summary(glm.nb(total_number_of_contractions_over26.7 ~ mcass_cardiovagal_index + offset(log(total_no_missing_time)),  data = EVA_Patients_5min_general_excluding_normal))

The incidence rate ratio (IRR) is 0.9593321

For each 1-unit increase in mcass_brsa, the expected total_number_of_contractions_over26.7 rate per hour is mulltiplied by ~0.9593321, indicating a 4.07% decrease in the rate.

At mcass_brsa = x1, the rate is 10 contractions over 26.7 per hour, then at mcass_brsa = x1+1, the rate becomes 9.6 contractions over 26.7 per hour.

6.2.2.2 Confounding model

summary(glm.nb(total_number_of_contractions_over26.7 ~ mcass_brsa + mcass_cardiovagal_index + offset(log(total_no_missing_time)),  data = EVA_Patients_5min_general_excluding_normal))
## 
## Call:
## glm.nb(formula = total_number_of_contractions_over26.7 ~ mcass_brsa + 
##     mcass_cardiovagal_index + offset(log(total_no_missing_time)), 
##     data = EVA_Patients_5min_general_excluding_normal, init.theta = 2.675214968, 
##     link = log)
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              4.12724    0.23302  17.712  < 2e-16 ***
## mcass_brsa              -0.04440    0.01350  -3.288  0.00101 ** 
## mcass_cardiovagal_index  0.09310    0.03165   2.942  0.00326 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(2.6752) family taken to be 1)
## 
##     Null deviance: 61.399  on 39  degrees of freedom
## Residual deviance: 42.590  on 37  degrees of freedom
## AIC: 469.46
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  2.675 
##           Std. Err.:  0.583 
## 
##  2 x log-likelihood:  -461.459
(exp(-0.04440) - exp(-0.04152))/exp(-0.04152)
## [1] -0.002875857

The percent change in the coefficient of mcass_brsa is much less than 10%, meaning that we conclude no material confounding by mcass_cardiovagal_index of the mcass_brsa - total_number_of_contractions_over26.7 per hour rate effect.

6.2.2.3 Effect Modification

If mcass_brsa’s effect on total_number_of_contractions_over26.7 per hour varies with mcass_cardiovagal_index, then there is effect modification.

summary(glm.nb(total_number_of_contractions_over26.7 ~ mcass_brsa * mcass_cardiovagal_index + offset(log(total_no_missing_time)),  data = EVA_Patients_5min_general_excluding_normal))
## 
## Call:
## glm.nb(formula = total_number_of_contractions_over26.7 ~ mcass_brsa * 
##     mcass_cardiovagal_index + offset(log(total_no_missing_time)), 
##     data = EVA_Patients_5min_general_excluding_normal, init.theta = 2.85876824, 
##     link = log)
## 
## Coefficients:
##                                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                         4.523494   0.324119  13.956  < 2e-16 ***
## mcass_brsa                         -0.074773   0.021403  -3.494 0.000477 ***
## mcass_cardiovagal_index            -0.001703   0.057668  -0.030 0.976436    
## mcass_brsa:mcass_cardiovagal_index  0.007376   0.003682   2.003 0.045169 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(2.8588) family taken to be 1)
## 
##     Null deviance: 65.471  on 39  degrees of freedom
## Residual deviance: 42.415  on 36  degrees of freedom
## AIC: 468.56
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  2.859 
##           Std. Err.:  0.626 
## 
##  2 x log-likelihood:  -458.560
(exp(-0.074773 + 0.007) - 1) * 100
## [1] -6.552743

Each increase of 1 unit of mcass_cardiovagal_index multiplies the per-unit effect of mcass_brsa by 1.007 -> 0.74% larger for Incidence Rate Ratio (change in the contractions over 26.7 per hour rate when X increases by 1).

Concrete example: The IRR for a +1 unit increase in mcass_brsa at a given mcass_cardiovagal_index is IRR = exp(-0.07 + 0.007*mcass_cardiovagal_index).

When mcass_cardiovagal_index = 0, increasing 1-unit for X reduces the rate of number of contractions over 26.7 per hour by 1 - exp(-0.074773 + 0.007376*0) = 7.2%.

When mcass_cardiovagal_index = 1, increasing 1-unit for X reduces the rate of number of contractions over 26.7 per hour by 1 - exp(-0.074773 + 0.007376*1) = 6.52%.

When mcass_cardiovagal_index = 10, increasing 1-unit for X does not change the rate of number of contractions over 26.7 per hour.

The modification effect is quite small.

7 Whether the key GI variables are influenced by the sex and age differences between the groups?

Question: Is the observed difference in the key GI variables between Jack’s data and EVA patient driven by sex/age imbalance, or does it persist after accounting for them?

In paper https://www.jacc.org/doi/10.1016/j.jacc.2016.10.060, the authors compared conventional covariate adjustment with several propensity score (PS) methods. Propensity score methods were not superior to the traditional covariate adjustment.

In line with prior methodological work (Elze et al., JACC 2017), which showed that propensity score methods were not superior to traditional covariate adjustment in observational analyses, it is believed that covariate adjustment in our regression model is sufficient.

7.1 Confounder Check

A variable is a confounder if it distorts the true relationship between the exposure and outcome. - That distortion is seen in how much the estimate (e.g., OR, β, HR) changes after adjustment — not whether the p-value crosses 0.05. Confounding is about bias in the effect estimate, not about statistical significance.

The percent change in the main effect (before vs. after adjustment) directly reflects how much the covariate alters your point estimate. It measures bias in the association, which is the essence of confounding.

If the estimate changes little (<10%), the variable doesn’t distort much.

If it changes meaningfully (≥10%), the variable materially biases your exposure–outcome relationship.

The percent change is calculated as \(Percent Change = \frac{|\text{Estimate}_{\text{adjusted}} - \text{Estimate}_{\text{crude}}|}{|\text{Estimate}_{\text{crude}}|} \times 100\%\).

When you look at whether your predictor remains statistically significant before and after adjustment, you’re assessing the stability and precision of its association after accounting for other variables — not confounding per se, but robustness of the effect.

Percent change in estimate - Whether another variable biases or distorts your predictor’s effect (confounding).

Change in p-value or significance - Whether your predictor’s effect remains statistically detectable after accounting for covariates (robustness / precision).

7.1.1 total_contractions_frequency - Negative Binomial - No confounding.

model <- glm(total_number_of_contractions  ~ source, offset = log(total_no_missing_time), family = poisson(link = "log"), data = all_data)
sum(residuals(model, type = "pearson")^2)/df.residual(model)
## [1] 161.6025

Since Pearson Dispersion is much larger than 1, we should use Negative Binomial, instead of Poisson Regression.

summary(glm.nb(total_number_of_contractions ~ source + offset(log(total_no_missing_time)), data = all_data))
## 
## Call:
## glm.nb(formula = total_number_of_contractions ~ source + offset(log(total_no_missing_time)), 
##     data = all_data, init.theta = 5.73464158, link = log)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        5.74565    0.04973 115.541  < 2e-16 ***
## sourceJack's Data -0.32165    0.06532  -4.924 8.46e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(5.7346) family taken to be 1)
## 
##     Null deviance: 198.45  on 168  degrees of freedom
## Residual deviance: 173.90  on 167  degrees of freedom
## AIC: 2445
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  5.735 
##           Std. Err.:  0.612 
## 
##  2 x log-likelihood:  -2439.025
summary(glm.nb(total_number_of_contractions ~ source + Sex + offset(log(total_no_missing_time)), data = all_data))
## 
## Call:
## glm.nb(formula = total_number_of_contractions ~ source + Sex + 
##     offset(log(total_no_missing_time)), data = all_data, init.theta = 5.741597048, 
##     link = log)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        5.76983    0.07105  81.204  < 2e-16 ***
## sourceJack's Data -0.33010    0.06723  -4.910 9.11e-07 ***
## SexMale           -0.03182    0.06802  -0.468     0.64    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(5.7416) family taken to be 1)
## 
##     Null deviance: 198.69  on 168  degrees of freedom
## Residual deviance: 173.90  on 166  degrees of freedom
## AIC: 2446.8
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  5.742 
##           Std. Err.:  0.612 
## 
##  2 x log-likelihood:  -2438.809
(exp(summary(glm.nb(total_number_of_contractions ~ source + Sex  + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]) - exp(summary(glm.nb(total_number_of_contractions ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]))/exp(summary(glm.nb(total_number_of_contractions ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"])
## [1] -0.008416814
summary(glm.nb(total_number_of_contractions ~ source + Age  + offset(log(total_no_missing_time)), data = all_data))
## 
## Call:
## glm.nb(formula = total_number_of_contractions ~ source + Age + 
##     offset(log(total_no_missing_time)), data = all_data, init.theta = 5.910060164, 
##     link = log)
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        5.595557   0.117673  47.552  < 2e-16 ***
## sourceJack's Data -0.296837   0.066202  -4.484 7.33e-06 ***
## Age                0.002845   0.002019   1.409    0.159    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(5.9101) family taken to be 1)
## 
##     Null deviance: 198.82  on 167  degrees of freedom
## Residual deviance: 172.71  on 165  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 2429.6
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  5.910 
##           Std. Err.:  0.633 
## 
##  2 x log-likelihood:  -2421.605
(exp(summary(glm.nb(total_number_of_contractions ~ source + Age  + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]) - exp(summary(glm.nb(total_number_of_contractions ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]))/exp(summary(glm.nb(total_number_of_contractions ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"])
## [1] 0.02512134
summary(glm.nb(total_number_of_contractions ~ source + Sex + Age  + offset(log(total_no_missing_time)), data = all_data))
## 
## Call:
## glm.nb(formula = total_number_of_contractions ~ source + Sex + 
##     Age + offset(log(total_no_missing_time)), data = all_data, 
##     init.theta = 5.912453014, link = log)
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        5.610915   0.130169  43.105   <2e-16 ***
## sourceJack's Data -0.301879   0.068342  -4.417    1e-05 ***
## SexMale           -0.018154   0.067342  -0.270    0.787    
## Age                0.002815   0.002021   1.393    0.164    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(5.9125) family taken to be 1)
## 
##     Null deviance: 198.90  on 167  degrees of freedom
## Residual deviance: 172.71  on 164  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 2431.5
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  5.912 
##           Std. Err.:  0.633 
## 
##  2 x log-likelihood:  -2421.533
(exp(summary(glm.nb(total_number_of_contractions ~ source + Sex + Age  + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]) - exp(summary(glm.nb(total_number_of_contractions ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]))/exp(summary(glm.nb(total_number_of_contractions ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"])
## [1] 0.01996556

Since the percent change is less than 10%, there is no confounding.

7.1.2 number_of_contractions_over26.7_per_hour - Negative Binomial - No confounding.

model <- glm(total_number_of_contractions_over26.7  ~ source, offset = log(total_no_missing_time), family = poisson(link = "log"), data = all_data)
sum(residuals(model, type = "pearson")^2)/df.residual(model)
## [1] 68.88857

Since Pearson Dispersion is much larger than 1, we should use Negative Binomial, instead of Poisson Regression.

summary(glm.nb(total_number_of_contractions_over26.7 ~ source + offset(log(total_no_missing_time)), data = all_data))
## 
## Call:
## glm.nb(formula = total_number_of_contractions_over26.7 ~ source + 
##     offset(log(total_no_missing_time)), data = all_data, init.theta = 1.87209386, 
##     link = log)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        3.83078    0.08739  43.834  < 2e-16 ***
## sourceJack's Data -0.51215    0.11491  -4.457 8.31e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1.8721) family taken to be 1)
## 
##     Null deviance: 203.34  on 168  degrees of freedom
## Residual deviance: 183.19  on 167  degrees of freedom
## AIC: 1903.6
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.872 
##           Std. Err.:  0.193 
## 
##  2 x log-likelihood:  -1897.586
summary(glm.nb(total_number_of_contractions_over26.7 ~ source + Sex  + offset(log(total_no_missing_time)), data = all_data))
## 
## Call:
## glm.nb(formula = total_number_of_contractions_over26.7 ~ source + 
##     Sex + offset(log(total_no_missing_time)), data = all_data, 
##     init.theta = 1.897730656, link = log)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         3.9645     0.1242  31.926  < 2e-16 ***
## sourceJack's Data  -0.5514     0.1176  -4.689 2.74e-06 ***
## SexMale            -0.1889     0.1190  -1.588    0.112    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1.8977) family taken to be 1)
## 
##     Null deviance: 206.05  on 168  degrees of freedom
## Residual deviance: 183.05  on 166  degrees of freedom
## AIC: 1903
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.898 
##           Std. Err.:  0.196 
## 
##  2 x log-likelihood:  -1895.020
(exp(summary(glm.nb(total_number_of_contractions_over26.7 ~ source + Sex  + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]) - exp(summary(glm.nb(total_number_of_contractions_over26.7 ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]))/exp(summary(glm.nb(total_number_of_contractions_over26.7 ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"])
## [1] -0.03845585
summary(glm.nb(total_number_of_contractions_over26.7 ~ source + Age  + offset(log(total_no_missing_time)), data = all_data))
## 
## Call:
## glm.nb(formula = total_number_of_contractions_over26.7 ~ source + 
##     Age + offset(log(total_no_missing_time)), data = all_data, 
##     init.theta = 1.973015419, link = log)
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        3.301979   0.204926  16.113  < 2e-16 ***
## sourceJack's Data -0.453430   0.115206  -3.936 8.29e-05 ***
## Age                0.010028   0.003516   2.852  0.00435 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1.973) family taken to be 1)
## 
##     Null deviance: 209.27  on 167  degrees of freedom
## Residual deviance: 181.31  on 165  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 1888
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.973 
##           Std. Err.:  0.205 
## 
##  2 x log-likelihood:  -1879.990
(exp(summary(glm.nb(total_number_of_contractions_over26.7 ~ source + Age  + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]) - exp(summary(glm.nb(total_number_of_contractions_over26.7 ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]))/exp(summary(glm.nb(total_number_of_contractions_over26.7 ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"])
## [1] 0.06048331
summary(glm.nb(total_number_of_contractions_over26.7 ~ source + Sex + Age  + offset(log(total_no_missing_time)), data = all_data))
## 
## Call:
## glm.nb(formula = total_number_of_contractions_over26.7 ~ source + 
##     Sex + Age + offset(log(total_no_missing_time)), data = all_data, 
##     init.theta = 1.998034502, link = log)
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        3.430426   0.225307  15.226  < 2e-16 ***
## sourceJack's Data -0.490925   0.118250  -4.152  3.3e-05 ***
## SexMale           -0.177355   0.116560  -1.522  0.12812    
## Age                0.009971   0.003499   2.849  0.00438 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1.998) family taken to be 1)
## 
##     Null deviance: 211.85  on 167  degrees of freedom
## Residual deviance: 181.17  on 164  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 1887.6
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.998 
##           Std. Err.:  0.208 
## 
##  2 x log-likelihood:  -1877.624
(exp(summary(glm.nb(total_number_of_contractions_over26.7 ~ source + Sex + Age  + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]) - exp(summary(glm.nb(total_number_of_contractions_over26.7 ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]))/exp(summary(glm.nb(total_number_of_contractions_over26.7 ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"])
## [1] 0.02145684

Since the percent change is less than 10%, there is no confounding.

7.1.3 number_of_total_epochs_frequency - Negative Binomial - No confounding.

model <- glm(number_of_total_epochs  ~ source, offset = log(total_no_missing_time), family = poisson(link = "log"), data = all_data)
sum(residuals(model, type = "pearson")^2)/df.residual(model)
## [1] 3.576141

Since Pearson Dispersion is much larger than 1, we should use Negative Binomial, instead of Poisson Regression.

summary(glm.nb(number_of_total_epochs ~ source + offset(log(total_no_missing_time)), data = all_data))
## 
## Call:
## glm.nb(formula = number_of_total_epochs ~ source + offset(log(total_no_missing_time)), 
##     data = all_data, init.theta = 4.242011971, link = log)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.08434    0.07037  15.410   <2e-16 ***
## sourceJack's Data  0.21696    0.09057   2.395   0.0166 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(4.242) family taken to be 1)
## 
##     Null deviance: 184.66  on 168  degrees of freedom
## Residual deviance: 178.93  on 167  degrees of freedom
## AIC: 1074.8
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  4.242 
##           Std. Err.:  0.665 
## 
##  2 x log-likelihood:  -1068.783
summary(glm.nb(number_of_total_epochs ~ source + Sex + offset(log(total_no_missing_time)), data = all_data))
## 
## Call:
## glm.nb(formula = number_of_total_epochs ~ source + Sex + offset(log(total_no_missing_time)), 
##     data = all_data, init.theta = 4.258986195, link = log)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.12566    0.09913  11.355   <2e-16 ***
## sourceJack's Data  0.20210    0.09326   2.167   0.0302 *  
## SexMale           -0.05403    0.09323  -0.580   0.5622    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(4.259) family taken to be 1)
## 
##     Null deviance: 185.14  on 168  degrees of freedom
## Residual deviance: 179.06  on 166  degrees of freedom
## AIC: 1076.5
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  4.259 
##           Std. Err.:  0.669 
## 
##  2 x log-likelihood:  -1068.452
(exp(summary(glm.nb(number_of_total_epochs ~ source + Sex + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]) - exp(summary(glm.nb(number_of_total_epochs ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]))/exp(exp(summary(glm.nb(number_of_total_epochs ~ source  + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]))
## [1] -0.005288966
summary(glm.nb(number_of_total_epochs ~ source + Age  + offset(log(total_no_missing_time)), data = all_data))
## 
## Call:
## glm.nb(formula = number_of_total_epochs ~ source + Age + offset(log(total_no_missing_time)), 
##     data = all_data, init.theta = 4.705615718, link = log)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       0.675927   0.158405   4.267 1.98e-05 ***
## sourceJack's Data 0.287674   0.089868   3.201  0.00137 ** 
## Age               0.007539   0.002677   2.817  0.00485 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(4.7056) family taken to be 1)
## 
##     Null deviance: 191.29  on 167  degrees of freedom
## Residual deviance: 176.84  on 165  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 1060.6
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  4.706 
##           Std. Err.:  0.760 
## 
##  2 x log-likelihood:  -1052.603
(exp(summary(glm.nb(number_of_total_epochs ~ source + Age  + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]) - exp(summary(glm.nb(number_of_total_epochs ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]))/exp(exp(summary(glm.nb(number_of_total_epochs ~ source  + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]))
## [1] 0.02628221
summary(glm.nb(number_of_total_epochs ~ source + Sex + Age  + offset(log(total_no_missing_time)), data = all_data))
## 
## Call:
## glm.nb(formula = number_of_total_epochs ~ source + Sex + Age + 
##     offset(log(total_no_missing_time)), data = all_data, init.theta = 4.712933149, 
##     link = log)
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        0.699163   0.175250   3.990 6.62e-05 ***
## sourceJack's Data  0.279847   0.092886   3.013  0.00259 ** 
## SexMale           -0.026907   0.090414  -0.298  0.76601    
## Age                0.007489   0.002679   2.796  0.00518 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(4.7129) family taken to be 1)
## 
##     Null deviance: 191.47  on 167  degrees of freedom
## Residual deviance: 176.93  on 164  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 1062.5
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  4.713 
##           Std. Err.:  0.762 
## 
##  2 x log-likelihood:  -1052.516
(exp(summary(glm.nb(number_of_total_epochs ~ source + Sex + Age  + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]) - exp(summary(glm.nb(number_of_total_epochs ~ source + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]))/exp(exp(summary(glm.nb(number_of_total_epochs ~ source  + offset(log(total_no_missing_time)), data = all_data))$coefficients[2, "Estimate"]))
## [1] 0.02328061

Since the percent change is less than 10%, there is no confounding.

7.1.4 imputed_sum_of_amplitude - ANCOVA - No confounding.

summary(lm(imputed_sum_of_amplitude ~ source, data = all_data))
## 
## Call:
## lm(formula = imputed_sum_of_amplitude ~ source, data = all_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -125376  -42592  -12682   19110  763119 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         187579      11253  16.669  < 2e-16 ***
## sourceJack's Data   -79402      14778  -5.373 2.57e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 94820 on 167 degrees of freedom
## Multiple R-squared:  0.1474, Adjusted R-squared:  0.1423 
## F-statistic: 28.87 on 1 and 167 DF,  p-value: 2.566e-07
summary(lm(imputed_sum_of_amplitude ~ source + Sex, data = all_data))
## 
## Call:
## lm(formula = imputed_sum_of_amplitude ~ source + Sex, data = all_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -123975  -40760  -13004   18764  758996 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         191702      16127  11.887  < 2e-16 ***
## sourceJack's Data   -80707      15259  -5.289 3.83e-07 ***
## SexMale              -5524      15437  -0.358    0.721    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 95070 on 166 degrees of freedom
## Multiple R-squared:  0.1481, Adjusted R-squared:  0.1378 
## F-statistic: 14.42 on 2 and 166 DF,  p-value: 1.676e-06
(summary(lm(imputed_sum_of_amplitude ~ source + Sex, data = all_data))$coefficients[2, "Estimate"] - summary(lm(imputed_sum_of_amplitude ~ source, data = all_data))$coefficients[2, "Estimate"])/summary(lm(imputed_sum_of_amplitude ~ source, data = all_data))$coefficients[2, "Estimate"]
## [1] 0.01643617
summary(lm(imputed_sum_of_amplitude ~ source + Age, data = all_data))
## 
## Call:
## lm(formula = imputed_sum_of_amplitude ~ source + Age, data = all_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -129893  -41744  -11400   20079  759955 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       163675.3    27104.1   6.039 9.96e-09 ***
## sourceJack's Data -76239.1    15249.5  -4.999 1.46e-06 ***
## Age                  451.1      465.1   0.970    0.333    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 95110 on 165 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1523, Adjusted R-squared:  0.142 
## F-statistic: 14.82 on 2 and 165 DF,  p-value: 1.204e-06
(summary(lm(imputed_sum_of_amplitude ~ source + Age, data = all_data))$coefficients[2, "Estimate"] - summary(lm(imputed_sum_of_amplitude ~ source, data = all_data))$coefficients[2, "Estimate"])/summary(lm(imputed_sum_of_amplitude ~ source, data = all_data))$coefficients[2, "Estimate"]
## [1] -0.03983097
summary(lm(imputed_sum_of_amplitude ~ source + Sex + Age, data = all_data))
## 
## Call:
## lm(formula = imputed_sum_of_amplitude ~ source + Sex + Age, data = all_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -128539  -42259  -10907   20427  756246 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       167843.3    30068.8   5.582 9.66e-08 ***
## sourceJack's Data -77512.1    15787.8  -4.910 2.19e-06 ***
## SexMale            -5040.0    15556.0  -0.324    0.746    
## Age                  443.5      466.9   0.950    0.344    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 95370 on 164 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1528, Adjusted R-squared:  0.1373 
## F-statistic: 9.861 on 3 and 164 DF,  p-value: 5.142e-06
(summary(lm(imputed_sum_of_amplitude ~ source + Sex + Age, data = all_data))$coefficients[2, "Estimate"] - summary(lm(imputed_sum_of_amplitude ~ source, data = all_data))$coefficients[2, "Estimate"])/summary(lm(imputed_sum_of_amplitude ~ source, data = all_data))$coefficients[2, "Estimate"]
## [1] -0.02379869

Since the percent change is less than 10%, there is no confounding.

7.1.5 Motility Index - ANCOVA - Confounding exists, but mild.

summary(lm(motility_index ~ source, data = all_data))
## 
## Call:
## lm(formula = motility_index ~ source, data = all_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.34781 -0.48502  0.08265  0.64249  2.15321 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        18.3287     0.1064 172.283   <2e-16 ***
## sourceJack's Data  -0.3278     0.1397  -2.347   0.0201 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8964 on 167 degrees of freedom
## Multiple R-squared:  0.03192,    Adjusted R-squared:  0.02612 
## F-statistic: 5.506 on 1 and 167 DF,  p-value: 0.02012
summary(lm(motility_index ~ source + Sex, data = all_data))
## 
## Call:
## lm(formula = motility_index ~ source + Sex, data = all_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.33189 -0.51706  0.07227  0.61399  2.18397 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        18.3756     0.1524 120.542   <2e-16 ***
## sourceJack's Data  -0.3427     0.1442  -2.376   0.0187 *  
## SexMale            -0.0628     0.1459  -0.430   0.6675    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8986 on 166 degrees of freedom
## Multiple R-squared:  0.033,  Adjusted R-squared:  0.02135 
## F-statistic: 2.832 on 2 and 166 DF,  p-value: 0.06173
(summary(lm(motility_index ~ source + Sex, data = all_data))$coefficients[2, "Estimate"]  - summary(lm(motility_index ~ source, data = all_data))$coefficients[2, "Estimate"])/summary(lm(motility_index ~ source, data = all_data))$coefficients[2, "Estimate"]
## [1] 0.04526403
summary(lm(motility_index ~ source + Age, data = all_data))
## 
## Call:
## lm(formula = motility_index ~ source + Age, data = all_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.29114 -0.51372  0.07746  0.63739  2.14190 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       18.128285   0.253687  71.459   <2e-16 ***
## sourceJack's Data -0.282527   0.142731  -1.979   0.0494 *  
## Age                0.003782   0.004353   0.869   0.3862    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8902 on 165 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.03364,    Adjusted R-squared:  0.02192 
## F-statistic: 2.872 on 2 and 165 DF,  p-value: 0.05944
(summary(lm(motility_index ~ source + Age, data = all_data))$coefficients[2, "Estimate"]  - summary(lm(motility_index ~ source, data = all_data))$coefficients[2, "Estimate"])/summary(lm(motility_index ~ source, data = all_data))$coefficients[2, "Estimate"]
## [1] -0.1381755
summary(lm(motility_index ~ source + Sex + Age, data = all_data))
## 
## Call:
## lm(formula = motility_index ~ source + Sex + Age, data = all_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.28308 -0.50494  0.07178  0.63631  2.15909 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       18.157154   0.281476  64.507   <2e-16 ***
## sourceJack's Data -0.291344   0.147791  -1.971   0.0504 .  
## SexMale           -0.034909   0.145621  -0.240   0.8108    
## Age                0.003729   0.004371   0.853   0.3949    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8928 on 164 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.03398,    Adjusted R-squared:  0.0163 
## F-statistic: 1.923 on 3 and 164 DF,  p-value: 0.1279
(summary(lm(motility_index ~ source + Sex + Age, data = all_data))$coefficients[2, "Estimate"]  - summary(lm(motility_index ~ source, data = all_data))$coefficients[2, "Estimate"])/summary(lm(motility_index ~ source, data = all_data))$coefficients[2, "Estimate"]
## [1] -0.111279

Adjusting for Sex, the percent change is less than 10%, indicating that Sex is not a confounder.

Adjusting for Age, the percent change is a little bit larger than 10%, indicating that Age is a mild confounder.

Adjusting for Sex and Age, the percent change is a little bit larger than 10%, indicating that Sex and Age are mild confounders jointly.

7.1.6 median_RMSSD_Time_fast epoch - ANCOVA - Confounding exists.

all_data_RMSSD <- all_data_RMSSD %>%
  right_join(all_data %>% dplyr::select(combined_ID, Age, Sex))
summary(lm(`median_RMSSD_Time_fast epoch` ~ source, data = all_data_RMSSD))
## 
## Call:
## lm(formula = `median_RMSSD_Time_fast epoch` ~ source, data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.596  -3.928  -1.055   3.149  18.654 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        53.1357     0.6817  77.943  < 2e-16 ***
## sourceJack's Data  -2.6704     0.8979  -2.974  0.00346 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.324 on 142 degrees of freedom
##   (25 observations deleted due to missingness)
## Multiple R-squared:  0.05863,    Adjusted R-squared:  0.052 
## F-statistic: 8.844 on 1 and 142 DF,  p-value: 0.003456
summary(lm(`median_RMSSD_Time_fast epoch` ~ source + Sex, data = all_data_RMSSD))
## 
## Call:
## lm(formula = `median_RMSSD_Time_fast epoch` ~ source + Sex, data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.474  -3.858  -1.061   3.357  18.776 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        53.5081     0.9864  54.246  < 2e-16 ***
## sourceJack's Data  -2.8048     0.9361  -2.996  0.00323 ** 
## SexMale            -0.4937     0.9431  -0.523  0.60146    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.338 on 141 degrees of freedom
##   (25 observations deleted due to missingness)
## Multiple R-squared:  0.06046,    Adjusted R-squared:  0.04713 
## F-statistic: 4.537 on 2 and 141 DF,  p-value: 0.01232
(summary(lm(`median_RMSSD_Time_fast epoch` ~ source + Sex, data = all_data_RMSSD))$coefficients[2,"Estimate"] - summary(lm(`median_RMSSD_Time_fast epoch` ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"])/summary(lm(`median_RMSSD_Time_fast epoch` ~ source, data = all_data_RMSSD))$coefficients[2, "Estimate"]
## [1] 0.05031982
summary(lm(`median_RMSSD_Time_fast epoch` ~ source + Age, data = all_data_RMSSD))
## 
## Call:
## lm(formula = `median_RMSSD_Time_fast epoch` ~ source + Age, data = all_data_RMSSD)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.1641  -3.4276  -0.4935   3.0769  18.0773 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       56.70760    1.59210  35.618  < 2e-16 ***
## sourceJack's Data -3.16919    0.90496  -3.502 0.000619 ***
## Age               -0.06786    0.02744  -2.473 0.014588 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.231 on 141 degrees of freedom
##   (25 observations deleted due to missingness)
## Multiple R-squared:  0.09776,    Adjusted R-squared:  0.08497 
## F-statistic: 7.639 on 2 and 141 DF,  p-value: 0.000708
(summary(lm(`median_RMSSD_Time_fast epoch` ~ source + Age, data = all_data_RMSSD))$coefficients[2,"Estimate"] - summary(lm(`median_RMSSD_Time_fast epoch` ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"])/summary(lm(`median_RMSSD_Time_fast epoch` ~ source, data = all_data_RMSSD))$coefficients[2, "Estimate"]
## [1] 0.186767
summary(lm(`median_RMSSD_Time_fast epoch` ~ source + Sex + Age, data = all_data_RMSSD))
## 
## Call:
## lm(formula = `median_RMSSD_Time_fast epoch` ~ source + Sex + 
##     Age, data = all_data_RMSSD)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.0234  -3.5671  -0.6307   3.1170  17.8599 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       57.15485    1.75910  32.491  < 2e-16 ***
## sourceJack's Data -3.32505    0.94299  -3.526 0.000571 ***
## SexMale           -0.55979    0.92670  -0.604 0.546774    
## Age               -0.06833    0.02751  -2.484 0.014182 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.243 on 140 degrees of freedom
##   (25 observations deleted due to missingness)
## Multiple R-squared:  0.1001, Adjusted R-squared:  0.08083 
## F-statistic: 5.192 on 3 and 140 DF,  p-value: 0.001977
(summary(lm(`median_RMSSD_Time_fast epoch` ~ source + Sex + Age, data = all_data_RMSSD))$coefficients[2,"Estimate"] - summary(lm(`median_RMSSD_Time_fast epoch` ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"])/summary(lm(`median_RMSSD_Time_fast epoch` ~ source, data = all_data_RMSSD))$coefficients[2, "Estimate"]
## [1] 0.2451344

Adjusting for Sex, the percent change is less than 10%, indicating that Sex is not a confounder.

Adjusting for Age, the percent change is larger than 10%, indicating that Age is a confounder.

Adjusting for Sex and Age, the percent change is larger than 10%, indicating that Sex and Age are confounders jointly.

7.1.7 median_RMSSD_Time_medium epoch - ANCOVA - Confounding exists, but mild.

summary(lm(`median_RMSSD_Time_medium epoch` ~ source, data = all_data_RMSSD))
## 
## Call:
## lm(formula = `median_RMSSD_Time_medium epoch` ~ source, data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.652  -5.543  -1.468   2.962  34.213 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         67.132      1.142  58.775   <2e-16 ***
## sourceJack's Data   -3.714      1.475  -2.519   0.0129 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.699 on 143 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.04248,    Adjusted R-squared:  0.03578 
## F-statistic: 6.343 on 1 and 143 DF,  p-value: 0.01288
summary(lm(`median_RMSSD_Time_medium epoch` ~ source + Sex, data = all_data_RMSSD))
## 
## Call:
## lm(formula = `median_RMSSD_Time_medium epoch` ~ source + Sex, 
##     data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.874  -5.493  -1.413   2.527  33.991 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        66.4941     1.6069  41.380   <2e-16 ***
## sourceJack's Data  -3.5012     1.5251  -2.296   0.0232 *  
## SexMale             0.8602     1.5209   0.566   0.5726    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.719 on 142 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.04463,    Adjusted R-squared:  0.03117 
## F-statistic: 3.317 on 2 and 142 DF,  p-value: 0.03911
(summary(lm(`median_RMSSD_Time_medium epoch` ~ source + Sex, data = all_data_RMSSD %>% filter(source %in% c("EVA Data", "Jack's Data"))))$coefficients[2,"Estimate"] - summary(lm(`median_RMSSD_Time_medium epoch` ~ source, data = all_data_RMSSD %>% filter(source %in% c("EVA Data", "Jack's Data"))))$coefficients[2,"Estimate"])/summary(lm(`median_RMSSD_Time_medium epoch` ~ source, data = all_data_RMSSD %>% filter(source %in% c("EVA Data", "Jack's Data"))))$coefficients[2, "Estimate"]
## [1] -0.05724126
summary(lm(`median_RMSSD_Time_medium epoch` ~ source + Age, data = all_data_RMSSD))
## 
## Call:
## lm(formula = `median_RMSSD_Time_medium epoch` ~ source + Age, 
##     data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.687  -5.989  -1.408   2.881  33.958 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       65.43632    2.70500  24.191   <2e-16 ***
## sourceJack's Data -3.46153    1.52161  -2.275   0.0244 *  
## Age                0.03147    0.04549   0.692   0.4902    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.715 on 142 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.04569,    Adjusted R-squared:  0.03225 
## F-statistic: 3.399 on 2 and 142 DF,  p-value: 0.03613
(summary(lm(`median_RMSSD_Time_medium epoch` ~ source + Age, data = all_data_RMSSD %>% filter(source %in% c("EVA Data", "Jack's Data"))))$coefficients[2,"Estimate"] - summary(lm(`median_RMSSD_Time_medium epoch` ~ source, data = all_data_RMSSD %>% filter(source %in% c("EVA Data", "Jack's Data"))))$coefficients[2,"Estimate"])/summary(lm(`median_RMSSD_Time_medium epoch` ~ source, data = all_data_RMSSD %>% filter(source %in% c("EVA Data", "Jack's Data"))))$coefficients[2, "Estimate"]
## [1] -0.06793226
summary(lm(`median_RMSSD_Time_medium epoch` ~ source + Sex + Age, data = all_data_RMSSD))
## 
## Call:
## lm(formula = `median_RMSSD_Time_medium epoch` ~ source + Sex + 
##     Age, data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.944  -5.606  -1.559   2.703  33.680 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       64.54611    3.04693  21.184   <2e-16 ***
## sourceJack's Data -3.19503    1.58066  -2.021   0.0451 *  
## SexMale            0.97979    1.53148   0.640   0.5234    
## Age                0.03451    0.04583   0.753   0.4528    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.733 on 141 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.04845,    Adjusted R-squared:  0.02821 
## F-statistic: 2.393 on 3 and 141 DF,  p-value: 0.07102
(summary(lm(`median_RMSSD_Time_medium epoch` ~ source + Sex + Age, data = all_data_RMSSD %>% filter(source %in% c("EVA Data", "Jack's Data"))))$coefficients[2,"Estimate"] - summary(lm(`median_RMSSD_Time_medium epoch` ~ source, data = all_data_RMSSD %>% filter(source %in% c("EVA Data", "Jack's Data"))))$coefficients[2,"Estimate"])/summary(lm(`median_RMSSD_Time_medium epoch` ~ source, data = all_data_RMSSD %>% filter(source %in% c("EVA Data", "Jack's Data"))))$coefficients[2, "Estimate"]
## [1] -0.1396935

Adjusting for Sex, the percent change is less than 10%, indicating that Sex is not a confounder.

Adjusting for Age, the percent change is larger than 10%, indicating that Age is a confounder.

Adjusting for Sex and Age, the percent change is larger than 10%, indicating that Sex and Age are mild confounders jointly.

This means that either variable alone is a confounder, but together they confound jointly, but mild.

7.1.8 median_RMSSD_Time_slow epoch - ANCOVA - No confounding.

summary(lm(`median_RMSSD_Time_slow epoch` ~ source, data = all_data_RMSSD))
## 
## Call:
## lm(formula = `median_RMSSD_Time_slow epoch` ~ source, data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.258  -7.843  -1.263   5.857  54.354 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         83.876      1.484  56.535  < 2e-16 ***
## sourceJack's Data   -5.417      1.909  -2.838  0.00518 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.4 on 147 degrees of freedom
##   (20 observations deleted due to missingness)
## Multiple R-squared:  0.05194,    Adjusted R-squared:  0.04549 
## F-statistic: 8.054 on 1 and 147 DF,  p-value: 0.005184
summary(lm(`median_RMSSD_Time_slow epoch` ~ source + Sex, data = all_data_RMSSD))
## 
## Call:
## lm(formula = `median_RMSSD_Time_slow epoch` ~ source + Sex, data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.439  -7.741  -1.271   5.959  54.271 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        83.5817     2.1573  38.744  < 2e-16 ***
## sourceJack's Data  -5.3202     1.9834  -2.682  0.00815 ** 
## SexMale             0.3772     2.0027   0.188  0.85087    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.43 on 146 degrees of freedom
##   (20 observations deleted due to missingness)
## Multiple R-squared:  0.05217,    Adjusted R-squared:  0.03919 
## F-statistic: 4.018 on 2 and 146 DF,  p-value: 0.02001
(summary(lm(`median_RMSSD_Time_slow epoch` ~ source + Sex, data = all_data_RMSSD))$coefficients[2,"Estimate"] - summary(lm(`median_RMSSD_Time_slow epoch` ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"])/summary(lm(`median_RMSSD_Time_slow epoch` ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"]
## [1] -0.01792457
summary(lm(`median_RMSSD_Time_slow epoch` ~ source + Age, data = all_data_RMSSD))
## 
## Call:
## lm(formula = `median_RMSSD_Time_slow epoch` ~ source + Age, data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.769  -7.744  -1.401   6.113  53.276 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       87.69717    3.38215  25.929  < 2e-16 ***
## sourceJack's Data -5.67444    1.93412  -2.934  0.00389 ** 
## Age               -0.07219    0.05754  -1.255  0.21162    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.29 on 145 degrees of freedom
##   (21 observations deleted due to missingness)
## Multiple R-squared:  0.05895,    Adjusted R-squared:  0.04597 
## F-statistic: 4.542 on 2 and 145 DF,  p-value: 0.01222
(summary(lm(`median_RMSSD_Time_slow epoch` ~ source + Age, data = all_data_RMSSD))$coefficients[2,"Estimate"] - summary(lm(`median_RMSSD_Time_slow epoch` ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"])/summary(lm(`median_RMSSD_Time_slow epoch` ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"]
## [1] 0.04746281
summary(lm(`median_RMSSD_Time_slow epoch` ~ source + Sex + Age, data = all_data_RMSSD))
## 
## Call:
## lm(formula = `median_RMSSD_Time_slow epoch` ~ source + Sex + 
##     Age, data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.053  -7.892  -1.287   6.335  53.168 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       87.22760    3.80111  22.948  < 2e-16 ***
## sourceJack's Data -5.52536    2.01514  -2.742  0.00688 ** 
## SexMale            0.54657    1.99447   0.274  0.78445    
## Age               -0.07137    0.05780  -1.235  0.21893    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.33 on 144 degrees of freedom
##   (21 observations deleted due to missingness)
## Multiple R-squared:  0.05944,    Adjusted R-squared:  0.03984 
## F-statistic: 3.033 on 3 and 144 DF,  p-value: 0.03129
(summary(lm(`median_RMSSD_Time_slow epoch` ~ source + Sex + Age, data = all_data_RMSSD))$coefficients[2,"Estimate"] - summary(lm(`median_RMSSD_Time_slow epoch` ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"])/summary(lm(`median_RMSSD_Time_slow epoch` ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"]
## [1] 0.01994353

Since the percent change is less than 10%, there is no confounding.

7.1.9 median_RMSSD_Magnitude_total_epoch - ANCOVA - Confounding exists.

summary(lm(median_RMSSD_Magnitude_total_epoch ~ source, data = all_data_RMSSD))
## 
## Call:
## lm(formula = median_RMSSD_Magnitude_total_epoch ~ source, data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.822 -14.598  -4.507   8.362  84.012 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         59.947      2.699  22.208   <2e-16 ***
## sourceJack's Data   -7.930      3.545  -2.237   0.0266 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.74 on 167 degrees of freedom
## Multiple R-squared:  0.02909,    Adjusted R-squared:  0.02328 
## F-statistic: 5.004 on 1 and 167 DF,  p-value: 0.02661
summary(lm(median_RMSSD_Magnitude_total_epoch ~ source + Sex, data = all_data_RMSSD))
## 
## Call:
## lm(formula = median_RMSSD_Magnitude_total_epoch ~ source + Sex, 
##     data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -37.766 -14.559  -4.527   8.401  83.975 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       59.89060    3.86997  15.476   <2e-16 ***
## sourceJack's Data -7.91177    3.66154  -2.161   0.0321 *  
## SexMale            0.07598    3.70438   0.021   0.9837    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.81 on 166 degrees of freedom
## Multiple R-squared:  0.0291, Adjusted R-squared:  0.0174 
## F-statistic: 2.487 on 2 and 166 DF,  p-value: 0.08622
((summary(lm(median_RMSSD_Magnitude_total_epoch ~ source + Sex, data = all_data_RMSSD))$coefficients[2,"Estimate"] - summary(lm(median_RMSSD_Magnitude_total_epoch ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"])/summary(lm(median_RMSSD_Magnitude_total_epoch ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"])
## [1] -0.00226397
summary(lm(median_RMSSD_Magnitude_total_epoch ~ source + Age, data = all_data_RMSSD))
## 
## Call:
## lm(formula = median_RMSSD_Magnitude_total_epoch ~ source + Age, 
##     data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.717 -14.703  -4.146   9.554  76.520 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        44.4576     6.3534   6.997 6.22e-11 ***
## sourceJack's Data  -5.4734     3.5746  -1.531  0.12763    
## Age                 0.2923     0.1090   2.682  0.00807 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.3 on 165 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.06782,    Adjusted R-squared:  0.05652 
## F-statistic: 6.002 on 2 and 165 DF,  p-value: 0.003047
((summary(lm(median_RMSSD_Magnitude_total_epoch ~ source + Age, data = all_data_RMSSD))$coefficients[2,"Estimate"] - summary(lm(median_RMSSD_Magnitude_total_epoch ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"])/summary(lm(median_RMSSD_Magnitude_total_epoch ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"])
## [1] -0.3097576
summary(lm(median_RMSSD_Magnitude_total_epoch ~ source + Sex + Age, data = all_data_RMSSD))
## 
## Call:
## lm(formula = median_RMSSD_Magnitude_total_epoch ~ source + Sex + 
##     Age, data = all_data_RMSSD)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.030 -14.437  -4.090   9.522  76.267 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        43.6788     7.0492   6.196  4.5e-09 ***
## sourceJack's Data  -5.2355     3.7012  -1.415  0.15910    
## SexMale             0.9418     3.6469   0.258  0.79653    
## Age                 0.2938     0.1095   2.684  0.00803 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.36 on 164 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.0682, Adjusted R-squared:  0.05115 
## F-statistic: 4.001 on 3 and 164 DF,  p-value: 0.008809
((summary(lm(median_RMSSD_Magnitude_total_epoch ~ source + Sex + Age, data = all_data_RMSSD))$coefficients[2,"Estimate"] - summary(lm(median_RMSSD_Magnitude_total_epoch ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"])/summary(lm(median_RMSSD_Magnitude_total_epoch ~ source, data = all_data_RMSSD))$coefficients[2,"Estimate"])
## [1] -0.3397569

Adjusting for Sex, the percent change is less than 10%, indicating that Sex is not a confounder.

Adjusting for Age, the percent change is larger than 10%, indicating that Age is a confounder.

Adjusting for Sex and Age, the percent change is larger than 10%, indicating that Sex and Age are confounders jointly.

7.1.10 total_contraction_rate_over10 - Quasi Binomial - Confounder exists, but mild.

model <- glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source, family = binomial(), data = all_data)
sum(residuals(model, type = "pearson")^2)/df.residual(model)
## [1] 74.59594

Since Pearson Dispersion is much larger than 1, we should use Quasi-Binomial, instead of Binomial regression.

summary(glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source, family = quasibinomial(), data = all_data))
## 
## Call:
## glm(formula = cbind(total_contraction_rate_over10 * total_minute_contraction, 
##     total_minute_contraction - total_contraction_rate_over10 * 
##         total_minute_contraction) ~ source, family = quasibinomial(), 
##     data = all_data)
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -3.5965     0.2011 -17.888  < 2e-16 ***
## sourceJack's Data   1.2033     0.2929   4.108 6.23e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 74.59594)
## 
##     Null deviance: 9086.6  on 168  degrees of freedom
## Residual deviance: 7897.1  on 167  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 7
summary(glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source + Sex, family = quasibinomial(), data = all_data))
## 
## Call:
## glm(formula = cbind(total_contraction_rate_over10 * total_minute_contraction, 
##     total_minute_contraction - total_contraction_rate_over10 * 
##         total_minute_contraction) ~ source + Sex, family = quasibinomial(), 
##     data = all_data)
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -4.1466     0.2395 -17.313  < 2e-16 ***
## sourceJack's Data   1.0192     0.2565   3.973 0.000106 ***
## SexMale             1.1674     0.2718   4.295 2.97e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 55.22838)
## 
##     Null deviance: 9086.6  on 168  degrees of freedom
## Residual deviance: 6795.8  on 166  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 7
(summary(glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source + Sex, family = quasibinomial(), data = all_data))$coefficients[2,"Estimate"] - summary(glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source, family = quasibinomial(), data = all_data))$coefficients[2,"Estimate"])/summary(glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source, family = quasibinomial(), data = all_data))$coefficients[2,"Estimate"]
## [1] -0.1530151
summary(glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source + Age, family = quasibinomial(), data = all_data))
## 
## Call:
## glm(formula = cbind(total_contraction_rate_over10 * total_minute_contraction, 
##     total_minute_contraction - total_contraction_rate_over10 * 
##         total_minute_contraction) ~ source + Age, family = quasibinomial(), 
##     data = all_data)
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -3.252373   0.613289  -5.303 3.61e-07 ***
## sourceJack's Data  1.143346   0.319005   3.584 0.000445 ***
## Age               -0.006221   0.010519  -0.591 0.555048    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 74.39515)
## 
##     Null deviance: 9068.6  on 167  degrees of freedom
## Residual deviance: 7832.0  on 165  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: NA
## 
## Number of Fisher Scoring iterations: 7
(summary(glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source + Age, family = quasibinomial(), data = all_data))$coefficients[2,"Estimate"] - summary(glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source, family = quasibinomial(), data = all_data))$coefficients[2,"Estimate"])/summary(glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source, family = quasibinomial(), data = all_data))$coefficients[2,"Estimate"]
## [1] -0.04980738
summary(glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source + Sex + Age, family = quasibinomial(), data = all_data))
## 
## Call:
## glm(formula = cbind(total_contraction_rate_over10 * total_minute_contraction, 
##     total_minute_contraction - total_contraction_rate_over10 * 
##         total_minute_contraction) ~ source + Sex + Age, family = quasibinomial(), 
##     data = all_data)
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -4.376397   0.565896  -7.734 1.00e-12 ***
## sourceJack's Data  1.061534   0.264569   4.012 9.12e-05 ***
## SexMale            1.206556   0.281689   4.283 3.13e-05 ***
## Age                0.003921   0.008951   0.438    0.662    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 55.89491)
## 
##     Null deviance: 9068.6  on 167  degrees of freedom
## Residual deviance: 6726.3  on 164  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: NA
## 
## Number of Fisher Scoring iterations: 7
(summary(glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source + Sex + Age, family = quasibinomial(), data = all_data))$coefficients[2,"Estimate"] - summary(glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source, family = quasibinomial(), data = all_data))$coefficients[2,"Estimate"])/summary(glm(cbind(total_contraction_rate_over10 * total_minute_contraction, total_minute_contraction - total_contraction_rate_over10*total_minute_contraction) ~ source, family = quasibinomial(), data = all_data))$coefficients[2,"Estimate"]
## [1] -0.1177983

Adjusting for Sex, the percent change is larger than 10%, indicating that Sex is not a confounder.

Adjusting for Age, the percent change is less than 10%, indicating that Age is a confounder.

Adjusting for Sex and Age, the percent change is larger than 10%, indicating that Sex and Age are mild confounders jointly.

7.2 Balance the data

Plot_conditioning(all_data, c("Age", "Sex"), "source")

library(forcats)
mod <- glm(source_cat ~ Age + Sex, family = binomial(link = "logit"), data = all_data %>% ungroup() %>% mutate(source_cat = ifelse(source == "EVA Patient", 1, 0)) %>% dplyr::select(source_cat, Age, Sex) %>% na.omit())
summary(mod)
## 
## Call:
## glm(formula = source_cat ~ Age + Sex, family = binomial(link = "logit"), 
##     data = all_data %>% ungroup() %>% mutate(source_cat = ifelse(source == 
##         "EVA Patient", 1, 0)) %>% dplyr::select(source_cat, Age, 
##         Sex) %>% na.omit())
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.55444    0.62271  -4.102 4.09e-05 ***
## Age          0.03119    0.01060   2.941  0.00327 ** 
## SexMale      1.11607    0.35148   3.175  0.00150 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 228.86  on 167  degrees of freedom
## Residual deviance: 209.39  on 165  degrees of freedom
## AIC: 215.39
## 
## Number of Fisher Scoring iterations: 4
# all_data %>%
#   ungroup() %>%
#   select(source, Age, Sex) %>%
#   na.omit() %>%
#   mutate(p = as.vector(predict(mod, type = "response"))) %>%
#   ggplot() +
#   geom_histogram(aes(x = p, fill = source), alpha = 0.3)

7.2.1 Propensity Score Matching

library(MatchIt)
df <- all_data %>%
  ungroup() %>%
  dplyr::select(source, Age, Sex, everything()) %>%
  tidyr::drop_na(source, Age, Sex) %>%
  mutate(
    # your propensity from a fitted model 'mod'
    p = as.vector(predict(mod, type = "response")),
    # make sure treatment is binary/factor with 2 levels
    source = as.factor(source)
  )

m.out <- matchit(
  source ~ Age + Sex,
  data        = df,
  method      = "nearest",
  estimand    = "ATT",
  distance    = df$p,      # use your prefit propensity scores
  discard     = "both",    # <-- auto-drop outside overlap
  caliper     = 0.2,
  std.caliper = TRUE,
  replace     = FALSE
)
summary(m.out)
## 
## Call:
## matchit(formula = source ~ Age + Sex, data = df, method = "nearest", 
##     distance = df$p, estimand = "ATT", discard = "both", replace = FALSE, 
##     caliper = 0.2, std.caliper = TRUE)
## 
## Summary of Balance for All Data:
##           Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance         0.3761        0.4862         -0.6631     1.3893    0.1996
## Age             45.5773       52.9859         -0.4055     2.3896    0.1516
## SexFemale        0.4948        0.2535          0.4827          .    0.2413
## SexMale          0.5052        0.7465         -0.4827          .    0.2413
##           eCDF Max
## distance    0.3922
## Age         0.3279
## SexFemale   0.2413
## SexMale     0.2413
## 
## Summary of Balance for Matched Data:
##           Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance         0.4518        0.4399          0.0713     0.8746    0.0272
## Age             50.6042       51.0417         -0.0239     1.7403    0.0618
## SexFemale        0.3125        0.3750         -0.1250          .    0.0625
## SexMale          0.6875        0.6250          0.1250          .    0.0625
##           eCDF Max Std. Pair Dist.
## distance    0.1250          0.0749
## Age         0.1458          0.4937
## SexFemale   0.0625          0.4584
## SexMale     0.0625          0.4584
## 
## Sample Sizes:
##           Control Treated
## All            71      97
## Matched        48      48
## Unmatched      23      38
## Discarded       0      11
library(cobalt)

# SMDs before (M.Raw) and after (M.Adj) matching
bt <- bal.tab(m.out, un = TRUE, s.d.denom = "pooled",
              m.threshold = 0.10,          # flag |SMD| > 0.10
              v.threshold = 2)             # optional: flag variance ratio outside [0.5, 2]

tab <- as.data.frame(bt$Balance)
# Most cobalt versions name the SMD columns as Diff.Un (pre) and Diff.Adj (post)
smds <- tab[, intersect(c("Diff.Un","Diff.Adj","M.Raw","M.Adj"), names(tab))]
round(smds, 3)                 # print nicely rounded
##          Diff.Un Diff.Adj
## distance  -0.715    0.077
## Age       -0.481   -0.028
## Sex_Male  -0.241    0.062

The standardized mean differences (SMD) of Age and Sex without propensity score matching are larger than 0.1, indicating imbalance. After propensity score matching, the SMDs are lower than 0.1, and the imbalance of Age and Sex has been solved.

# matched_all_data <- all_data %>% 
#   ungroup() %>% 
#   dplyr::select(source, Age, Sex, total_contractions_frequency) %>% 
#   na.omit() %>%
#   mutate(p = as.vector(predict(mod, type = "response"))) %>%
#   filter(p > 0.3 & p < 0.6)

matched_all_data <- match.data(m.out) %>% filter(weights > 0)